[llvm] [BOLT] support mold linker generated PLT in disassembling (PR #115256)

Patrick Zhang via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 14 22:51:03 PST 2024


https://github.com/patrickphzhang updated https://github.com/llvm/llvm-project/pull/115256

>From be514ed8fd8a0f4be6726d320dc22fd8f9484702 Mon Sep 17 00:00:00 2001
From: patphzhang <patphzhang at tencent.com>
Date: Thu, 7 Nov 2024 11:13:49 +0800
Subject: [PATCH 1/7] [BOLT] support mold linker generated PLT in disassembling

---
 bolt/include/bolt/Utils/CommandLineOpts.h |   1 +
 bolt/lib/Rewrite/RewriteInstance.cpp      |  30 +-
 bolt/lib/Utils/CommandLineOpts.cpp        |   6 +
 bolt/test/X86/Inputs/plt-mold-header.yaml | 399 ++++++++++++++++++++++
 bolt/test/X86/plt-mold-header.test        |   7 +
 5 files changed, 442 insertions(+), 1 deletion(-)
 create mode 100644 bolt/test/X86/Inputs/plt-mold-header.yaml
 create mode 100644 bolt/test/X86/plt-mold-header.test

diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h
index 04bf7db5de9527..3b0c0db1bd089e 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -34,6 +34,7 @@ extern llvm::cl::opt<bool> AggregateOnly;
 extern llvm::cl::opt<unsigned> BucketsPerLine;
 extern llvm::cl::opt<bool> DiffOnly;
 extern llvm::cl::opt<bool> EnableBAT;
+extern llvm::cl::opt<bool> UseMold;
 extern llvm::cl::opt<bool> EqualizeBBCounts;
 extern llvm::cl::opt<bool> RemoveSymtab;
 extern llvm::cl::opt<unsigned> ExecutionCountThreshold;
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 32ec7abe8b666a..a7118be5dc263a 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1672,7 +1672,35 @@ void RewriteInstance::disassemblePLTSectionX86(BinarySection &Section,
   const uint64_t SectionAddress = Section.getAddress();
   const uint64_t SectionSize = Section.getSize();
 
-  for (uint64_t EntryOffset = 0; EntryOffset + EntrySize <= SectionSize;
+  uint64_t EntryStartOffset = 0;
+  if (opts::UseMold) {
+    // The mold linker (https://github.com/rui314/mold/blob/v2.34.1/src/arch-x86-64.cc#L50)
+    // generates a unique format for the PLT.
+    // The first entry of the mold-style PLT is 32 bytes long, while the remaining entries
+    // are 16 bytes long. We need to parse the first entry with a special offset limit setting.
+    uint64_t HeaderSize = 32;
+    outs() << "BOLT-INFO: parsing PLT header for mold\n";
+    MCInst Instruction;
+    uint64_t InstrSize, InstrOffset = EntryStartOffset;
+    while (InstrOffset < HeaderSize) {
+      disassemblePLTInstruction(Section, InstrOffset, Instruction, InstrSize);
+      if (BC->MIB->isIndirectBranch(Instruction))
+        break;
+      InstrOffset += InstrSize;
+    }
+    uint64_t TargetAddress;
+    if (!BC->MIB->evaluateMemOperandTarget(Instruction, TargetAddress,
+                                            SectionAddress + InstrOffset,
+                                            InstrSize)) {
+      errs() << "BOLT-ERROR: error evaluating PLT instruction for the mold header at offset 0x"
+                  << Twine::utohexstr(SectionAddress + InstrOffset) << '\n';
+      exit(1);
+    }
+    createPLTBinaryFunction(TargetAddress, SectionAddress, HeaderSize);
+    EntryStartOffset += HeaderSize;
+  }
+
+  for (uint64_t EntryOffset = EntryStartOffset; EntryOffset + EntrySize <= SectionSize;
        EntryOffset += EntrySize) {
     MCInst Instruction;
     uint64_t InstrSize, InstrOffset = EntryOffset;
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index de82420a167131..356e530c9ca361 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -72,6 +72,12 @@ EnableBAT("enable-bat",
   cl::ZeroOrMore,
   cl::cat(BoltCategory));
 
+cl::opt<bool> UseMold("use-mold",
+  cl::desc("the binary is generated by the mold linker"),
+  cl::init(false),
+  cl::ZeroOrMore,
+  cl::cat(BoltCategory));
+
 cl::opt<bool> EqualizeBBCounts(
     "equalize-bb-counts",
     cl::desc("use same count for BBs that should have equivalent count (used "
diff --git a/bolt/test/X86/Inputs/plt-mold-header.yaml b/bolt/test/X86/Inputs/plt-mold-header.yaml
new file mode 100644
index 00000000000000..be6eabeccbba8f
--- /dev/null
+++ b/bolt/test/X86/Inputs/plt-mold-header.yaml
@@ -0,0 +1,399 @@
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_DYN
+  Machine:         EM_X86_64
+  Entry:           0x13D0
+ProgramHeaders:
+  - Type:            PT_PHDR
+    Flags:           [ PF_R ]
+    VAddr:           0x40
+    Align:           0x8
+  - Type:            PT_INTERP
+    Flags:           [ PF_R ]
+    FirstSec:        .interp
+    LastSec:         .interp
+    VAddr:           0x270
+  - Type:            PT_LOAD
+    Flags:           [ PF_R ]
+    FirstSec:        .interp
+    LastSec:         .rodata.str
+    Align:           0x1000
+  - Type:            PT_LOAD
+    Flags:           [ PF_X, PF_R ]
+    FirstSec:        .plt
+    LastSec:         .text
+    VAddr:           0x13A0
+    Align:           0x1000
+  - Type:            PT_LOAD
+    Flags:           [ PF_W, PF_R ]
+    FirstSec:        .dynamic
+    LastSec:         .relro_padding
+    VAddr:           0x23F8
+    Align:           0x1000
+  - Type:            PT_LOAD
+    Flags:           [ PF_W, PF_R ]
+    FirstSec:        .got.plt
+    LastSec:         .got.plt
+    VAddr:           0x3550
+    Align:           0x1000
+  - Type:            PT_DYNAMIC
+    Flags:           [ PF_W, PF_R ]
+    FirstSec:        .dynamic
+    LastSec:         .dynamic
+    VAddr:           0x23F8
+    Align:           0x8
+  - Type:            PT_GNU_EH_FRAME
+    Flags:           [ PF_R ]
+    FirstSec:        .eh_frame_hdr
+    LastSec:         .eh_frame_hdr
+    VAddr:           0x37C
+    Align:           0x4
+  - Type:            PT_GNU_STACK
+    Flags:           [ PF_W, PF_R ]
+  - Type:            PT_GNU_RELRO
+    Flags:           [ PF_R ]
+    FirstSec:        .dynamic
+    LastSec:         .relro_padding
+    VAddr:           0x23F8
+Sections:
+  - Name:            .interp
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x270
+    AddressAlign:    0x1
+    Content:         2F6C696236342F6C642D6C696E75782D7838362D36342E736F2E3200
+  - Name:            .gnu.hash
+    Type:            SHT_GNU_HASH
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x290
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Header:
+      SymNdx:          0x2
+      Shift2:          0x1A
+    BloomFilter:     [ 0x0 ]
+    HashBuckets:     [ 0x0 ]
+    HashValues:      [  ]
+  - Name:            .dynsym
+    Type:            SHT_DYNSYM
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2B0
+    Link:            .dynstr
+    AddressAlign:    0x8
+  - Name:            .dynstr
+    Type:            SHT_STRTAB
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2E0
+    AddressAlign:    0x1
+  - Name:            .gnu.version
+    Type:            SHT_GNU_versym
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x2FE
+    Link:            .dynsym
+    AddressAlign:    0x2
+    Entries:         [ 0, 2 ]
+  - Name:            .gnu.version_r
+    Type:            SHT_GNU_verneed
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x308
+    Link:            .dynstr
+    AddressAlign:    0x8
+    Dependencies:
+      - Version:         1
+        File:            libc.so.6
+        Entries:
+          - Name:            GLIBC_2.2.5
+            Hash:            157882997
+            Flags:           0
+            Other:           2
+  - Name:            .rela.plt
+    Type:            SHT_RELA
+    Flags:           [ SHF_ALLOC, SHF_INFO_LINK ]
+    Address:         0x328
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Info:            .got.plt
+    Relocations:
+      - Offset:          0x3568
+        Symbol:          printf
+        Type:            R_X86_64_JUMP_SLOT
+  - Name:            .eh_frame
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x340
+    AddressAlign:    0x8
+    Content:         1400000000000000017A5200017810011B0C0708900100001C0000001C000000701000002500000000410E108602430D06600C070800000000000000
+  - Name:            .eh_frame_hdr
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x37C
+    AddressAlign:    0x4
+    Content:         011B033BC0FFFFFF0100000054100000DCFFFFFF
+  - Name:            .rodata.str
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x390
+    AddressAlign:    0x1
+    Content:         48656C6C6F20776F726C64210A00
+  - Name:            .plt
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x13A0
+    AddressAlign:    0x10
+    Content:         F30F1EFA4153FF35CC3C1602FF25CE3C1602CCCCCCCCCCCCCCCCCCCCCCCCCCCCF30F1EFA41BB00000000FF2598210000
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x13D0
+    AddressAlign:    0x10
+    Content:         554889E54883EC10C745FC00000000488D3DAAEFFFFFB000E8D3FFFFFF31C04883C4105DC3
+  - Name:            .dynamic
+    Type:            SHT_DYNAMIC
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x23F8
+    Link:            .dynstr
+    AddressAlign:    0x8
+    Entries:
+      - Tag:             DT_NEEDED
+        Value:           0x1
+      - Tag:             DT_JMPREL
+        Value:           0x328
+      - Tag:             DT_PLTRELSZ
+        Value:           0x18
+      - Tag:             DT_PLTREL
+        Value:           0x7
+      - Tag:             DT_PLTGOT
+        Value:           0x3550
+      - Tag:             DT_SYMTAB
+        Value:           0x2B0
+      - Tag:             DT_SYMENT
+        Value:           0x18
+      - Tag:             DT_STRTAB
+        Value:           0x2E0
+      - Tag:             DT_STRSZ
+        Value:           0x1E
+      - Tag:             DT_VERSYM
+        Value:           0x2FE
+      - Tag:             DT_VERNEED
+        Value:           0x308
+      - Tag:             DT_VERNEEDNUM
+        Value:           0x1
+      - Tag:             DT_GNU_HASH
+        Value:           0x290
+      - Tag:             DT_FLAGS_1
+        Value:           0x8000000
+      - Tag:             DT_DEBUG
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+  - Name:            .got
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x2548
+    AddressAlign:    0x8
+    Content:         '0000000000000000'
+  - Name:            .relro_padding
+    Type:            SHT_NOBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x2550
+    AddressAlign:    0x1
+    Size:            0xAB0
+  - Name:            .got.plt
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x3550
+    AddressAlign:    0x8
+    Content:         F82300000000000000000000000000000000000000000000A013000000000000
+  - Name:            .rela.text
+    Type:            SHT_RELA
+    Flags:           [ SHF_INFO_LINK ]
+    Link:            .symtab
+    AddressAlign:    0x8
+    Info:            .text
+    Relocations:
+      - Offset:          0x13E2
+        Symbol:          .L.str
+        Type:            R_X86_64_PC32
+        Addend:          -4
+      - Offset:          0x13E9
+        Symbol:          printf
+        Type:            R_X86_64_PLT32
+        Addend:          -4
+  - Type:            SectionHeaderTable
+    Sections:
+      - Name:            .interp
+      - Name:            .gnu.hash
+      - Name:            .dynsym
+      - Name:            .dynstr
+      - Name:            .gnu.version
+      - Name:            .gnu.version_r
+      - Name:            .rela.plt
+      - Name:            .eh_frame
+      - Name:            .eh_frame_hdr
+      - Name:            .rodata.str
+      - Name:            .plt
+      - Name:            .text
+      - Name:            .rela.text
+      - Name:            .dynamic
+      - Name:            .got
+      - Name:            .relro_padding
+      - Name:            .got.plt
+      - Name:            .symtab
+      - Name:            .strtab
+      - Name:            .shstrtab
+Symbols:
+  - Name:            .interp
+    Type:            STT_SECTION
+    Section:         .interp
+    Value:           0x270
+  - Name:            .gnu.hash
+    Type:            STT_SECTION
+    Section:         .gnu.hash
+    Value:           0x290
+  - Name:            .dynsym
+    Type:            STT_SECTION
+    Section:         .dynsym
+    Value:           0x2B0
+  - Name:            .dynstr
+    Type:            STT_SECTION
+    Section:         .dynstr
+    Value:           0x2E0
+  - Name:            .gnu.version
+    Type:            STT_SECTION
+    Section:         .gnu.version
+    Value:           0x2FE
+  - Name:            .gnu.version_r
+    Type:            STT_SECTION
+    Section:         .gnu.version_r
+    Value:           0x308
+  - Name:            .rela.plt
+    Type:            STT_SECTION
+    Section:         .rela.plt
+    Value:           0x328
+  - Name:            .eh_frame
+    Type:            STT_SECTION
+    Section:         .eh_frame
+    Value:           0x340
+  - Name:            .eh_frame_hdr
+    Type:            STT_SECTION
+    Section:         .eh_frame_hdr
+    Value:           0x37C
+  - Name:            .rodata.str
+    Type:            STT_SECTION
+    Section:         .rodata.str
+    Value:           0x390
+  - Name:            .plt
+    Type:            STT_SECTION
+    Section:         .plt
+    Value:           0x13A0
+  - Name:            .text
+    Type:            STT_SECTION
+    Section:         .text
+    Value:           0x13D0
+  - Name:            .dynamic
+    Type:            STT_SECTION
+    Section:         .dynamic
+    Value:           0x23F8
+  - Name:            .got
+    Type:            STT_SECTION
+    Section:         .got
+    Value:           0x2548
+  - Name:            .relro_padding
+    Type:            STT_SECTION
+    Section:         .relro_padding
+    Value:           0x2550
+  - Name:            .got.plt
+    Type:            STT_SECTION
+    Section:         .got.plt
+    Value:           0x3550
+  - Name:            'printf$plt'
+    Type:            STT_FUNC
+    Section:         .plt
+    Value:           0x13C0
+  - Name:            hello.c
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            .L.str
+    Type:            STT_OBJECT
+    Section:         .rodata.str
+    Value:           0x390
+  - Name:            main
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x13D0
+    Size:            0x25
+  - Name:            __ehdr_start
+    Section:         .interp
+  - Name:            __init_array_start
+    Index:           SHN_ABS
+  - Name:            __init_array_end
+    Index:           SHN_ABS
+  - Name:            __fini_array_start
+    Index:           SHN_ABS
+  - Name:            __fini_array_end
+    Index:           SHN_ABS
+  - Name:            __preinit_array_start
+    Index:           SHN_ABS
+  - Name:            __preinit_array_end
+    Index:           SHN_ABS
+  - Name:            _DYNAMIC
+    Section:         .dynamic
+    Value:           0x23F8
+  - Name:            _GLOBAL_OFFSET_TABLE_
+    Section:         .got.plt
+    Value:           0x3550
+  - Name:            _PROCEDURE_LINKAGE_TABLE_
+    Section:         .plt
+    Value:           0x13A0
+  - Name:            __bss_start
+    Index:           SHN_ABS
+  - Name:            _end
+    Section:         .got.plt
+    Value:           0x3570
+  - Name:            _etext
+    Section:         .text
+    Value:           0x13F5
+  - Name:            _edata
+    Section:         .got.plt
+    Value:           0x3570
+  - Name:            __executable_start
+    Section:         .interp
+  - Name:            __rela_iplt_start
+    Index:           SHN_ABS
+  - Name:            __rela_iplt_end
+    Index:           SHN_ABS
+  - Name:            __GNU_EH_FRAME_HDR
+    Section:         .eh_frame_hdr
+    Value:           0x37C
+  - Name:            end
+    Section:         .got.plt
+    Value:           0x3570
+  - Name:            etext
+    Section:         .text
+    Value:           0x13F5
+  - Name:            edata
+    Section:         .got.plt
+    Value:           0x3570
+  - Name:            __dso_handle
+    Section:         .interp
+  - Name:            _TLS_MODULE_BASE_
+    Section:         .interp
+  - Name:            printf
+    Binding:         STB_GLOBAL
+DynamicSymbols:
+  - Name:            printf
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+...
diff --git a/bolt/test/X86/plt-mold-header.test b/bolt/test/X86/plt-mold-header.test
new file mode 100644
index 00000000000000..8cbbed8711cbce
--- /dev/null
+++ b/bolt/test/X86/plt-mold-header.test
@@ -0,0 +1,7 @@
+# RUN: yaml2obj %p/Inputs/plt-mold-header.yaml &> %t.exe
+# RUN: llvm-bolt -use-mold %t.exe --print-cfg --print-only=main.* -o %t.out | FileCheck %s
+
+## Check that llvm-bolt correctly parses PLT header created by mold linker.
+## Without the '-use-mold' option, "BOLT-ERROR: unable to disassemble instruction in PLT section .plt at offset 0x10" will be reported.
+## The only call instruction in main() should be a call to printf() in PLT.
+CHECK:  callq "printf$plt

>From fd9dc20eb9e5da903ea593df7c8a774dc481414a Mon Sep 17 00:00:00 2001
From: patphzhang <patphzhang at tencent.com>
Date: Fri, 8 Nov 2024 16:16:02 +0800
Subject: [PATCH 2/7] [BOLT] support mold linker generated PLT in disassembling
 without the new option

---
 bolt/include/bolt/Core/MCPlusBuilder.h    |  5 +++
 bolt/include/bolt/Utils/CommandLineOpts.h |  1 -
 bolt/lib/Rewrite/RewriteInstance.cpp      | 54 ++++++++++-------------
 bolt/lib/Target/X86/X86MCPlusBuilder.cpp  | 23 ++++++++++
 bolt/lib/Utils/CommandLineOpts.cpp        |  6 ---
 5 files changed, 52 insertions(+), 37 deletions(-)

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 32eda0b283b883..2cc94c52f802de 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1495,6 +1495,11 @@ class MCPlusBuilder {
     return 0;
   }
 
+  virtual bool isMoldPLTHeader(std::vector<MCInst *> &Insns) const {
+    llvm_unreachable("not implemented");
+    return false;
+  }
+
   virtual bool analyzeVirtualMethodCall(InstructionIterator Begin,
                                         InstructionIterator End,
                                         std::vector<MCInst *> &MethodFetchInsns,
diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h
index 3b0c0db1bd089e..04bf7db5de9527 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -34,7 +34,6 @@ extern llvm::cl::opt<bool> AggregateOnly;
 extern llvm::cl::opt<unsigned> BucketsPerLine;
 extern llvm::cl::opt<bool> DiffOnly;
 extern llvm::cl::opt<bool> EnableBAT;
-extern llvm::cl::opt<bool> UseMold;
 extern llvm::cl::opt<bool> EqualizeBBCounts;
 extern llvm::cl::opt<bool> RemoveSymtab;
 extern llvm::cl::opt<unsigned> ExecutionCountThreshold;
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index a7118be5dc263a..831880233b3acf 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1672,35 +1672,7 @@ void RewriteInstance::disassemblePLTSectionX86(BinarySection &Section,
   const uint64_t SectionAddress = Section.getAddress();
   const uint64_t SectionSize = Section.getSize();
 
-  uint64_t EntryStartOffset = 0;
-  if (opts::UseMold) {
-    // The mold linker (https://github.com/rui314/mold/blob/v2.34.1/src/arch-x86-64.cc#L50)
-    // generates a unique format for the PLT.
-    // The first entry of the mold-style PLT is 32 bytes long, while the remaining entries
-    // are 16 bytes long. We need to parse the first entry with a special offset limit setting.
-    uint64_t HeaderSize = 32;
-    outs() << "BOLT-INFO: parsing PLT header for mold\n";
-    MCInst Instruction;
-    uint64_t InstrSize, InstrOffset = EntryStartOffset;
-    while (InstrOffset < HeaderSize) {
-      disassemblePLTInstruction(Section, InstrOffset, Instruction, InstrSize);
-      if (BC->MIB->isIndirectBranch(Instruction))
-        break;
-      InstrOffset += InstrSize;
-    }
-    uint64_t TargetAddress;
-    if (!BC->MIB->evaluateMemOperandTarget(Instruction, TargetAddress,
-                                            SectionAddress + InstrOffset,
-                                            InstrSize)) {
-      errs() << "BOLT-ERROR: error evaluating PLT instruction for the mold header at offset 0x"
-                  << Twine::utohexstr(SectionAddress + InstrOffset) << '\n';
-      exit(1);
-    }
-    createPLTBinaryFunction(TargetAddress, SectionAddress, HeaderSize);
-    EntryStartOffset += HeaderSize;
-  }
-
-  for (uint64_t EntryOffset = EntryStartOffset; EntryOffset + EntrySize <= SectionSize;
+  for (uint64_t EntryOffset = 0; EntryOffset + EntrySize <= SectionSize;
        EntryOffset += EntrySize) {
     MCInst Instruction;
     uint64_t InstrSize, InstrOffset = EntryOffset;
@@ -1717,8 +1689,30 @@ void RewriteInstance::disassemblePLTSectionX86(BinarySection &Section,
       InstrOffset += InstrSize;
     }
 
-    if (InstrOffset + InstrSize > EntryOffset + EntrySize)
+    if (InstrOffset + InstrSize > EntryOffset + EntrySize) {
+      // Check if it is a mold header before rolling back because the mold linker generates
+      // a unique format. The header entry of the mold-style PLT is 32 bytes long, while the
+      // remaining entries are 16 bytes long. We need to skip the header entry.
+      uint64_t HeaderOffset = 0, MoldHeaderSize = 32;
+      if (EntryOffset == HeaderOffset && SectionSize >= MoldHeaderSize) {
+        std::vector<MCInst *> Insns;
+        MCInst Instructions[32]; // 32 insns at most
+        uint32_t Index = 0;
+        while (HeaderOffset < MoldHeaderSize) {
+          disassemblePLTInstruction(Section, HeaderOffset, Instructions[Index], InstrSize);
+          Insns.push_back(&Instructions[Index]);
+          HeaderOffset += InstrSize;
+          Index++;
+        }
+        // if it is a mold header, skip it
+        if (BC->MIB->isMoldPLTHeader(Insns)) {
+          BC->outs() << "BOLT-INFO: parsing the PLT of the mold linker\n";
+          EntryOffset += EntrySize;
+        }
+          
+      }
       continue;
+    }
 
     uint64_t TargetAddress;
     if (!BC->MIB->evaluateMemOperandTarget(Instruction, TargetAddress,
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
index 63086c06d74fd9..215380085deb01 100644
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -2127,6 +2127,29 @@ class X86MCPlusBuilder : public MCPlusBuilder {
     return Type;
   }
 
+  /// Analyze a series of insns that match the PLT header of the mold linker
+  /// (https://github.com/rui314/mold/blob/v2.34.1/src/arch-x86-64.cc#L50).
+  /// The size of the header is 32 bytes and the format is as follows:
+  ///   endbr64
+  ///   push %r11
+  ///   push GOTPLT+8(%rip)
+  ///   jmp *GOTPLT+16(%rip)
+  ///   padding (14 bytes)
+  ///
+  bool isMoldPLTHeader(std::vector<MCInst *> &Insns) const override {
+    if (Insns.size() != 18)
+      return false;
+      
+    if (!isTerminateBranch(*Insns[0]) || !isPush(*Insns[1])
+        || !isPush(*Insns[2]) || !isIndirectBranch(*Insns[3]))
+      return false;
+      
+    for (unsigned int i = 4; i < 18; ++i)
+      if (Insns[i]->getOpcode() != X86::INT3)
+        return false;
+    return true;
+  }
+
   /// Analyze a callsite to see if it could be a virtual method call.  This only
   /// checks to see if the overall pattern is satisfied, it does not guarantee
   /// that the callsite is a true virtual method call.
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index 356e530c9ca361..de82420a167131 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -72,12 +72,6 @@ EnableBAT("enable-bat",
   cl::ZeroOrMore,
   cl::cat(BoltCategory));
 
-cl::opt<bool> UseMold("use-mold",
-  cl::desc("the binary is generated by the mold linker"),
-  cl::init(false),
-  cl::ZeroOrMore,
-  cl::cat(BoltCategory));
-
 cl::opt<bool> EqualizeBBCounts(
     "equalize-bb-counts",
     cl::desc("use same count for BBs that should have equivalent count (used "

>From 8c51da24960b673d242247a3af8e486bac8a2b78 Mon Sep 17 00:00:00 2001
From: patphzhang <patphzhang at tencent.com>
Date: Fri, 8 Nov 2024 16:33:15 +0800
Subject: [PATCH 3/7] [BOLT] removed the mold option in the test case

---
 bolt/test/X86/plt-mold-header.test | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/bolt/test/X86/plt-mold-header.test b/bolt/test/X86/plt-mold-header.test
index 8cbbed8711cbce..563e15e22d6642 100644
--- a/bolt/test/X86/plt-mold-header.test
+++ b/bolt/test/X86/plt-mold-header.test
@@ -1,7 +1,6 @@
 # RUN: yaml2obj %p/Inputs/plt-mold-header.yaml &> %t.exe
-# RUN: llvm-bolt -use-mold %t.exe --print-cfg --print-only=main.* -o %t.out | FileCheck %s
+# RUN: llvm-bolt %t.exe --print-cfg --print-only=main.* -o %t.out | FileCheck %s
 
 ## Check that llvm-bolt correctly parses PLT header created by mold linker.
-## Without the '-use-mold' option, "BOLT-ERROR: unable to disassemble instruction in PLT section .plt at offset 0x10" will be reported.
 ## The only call instruction in main() should be a call to printf() in PLT.
 CHECK:  callq "printf$plt

>From beaa547d8e65baa6d81f1bbc9916b766b713f99e Mon Sep 17 00:00:00 2001
From: patphzhang <patphzhang at tencent.com>
Date: Mon, 11 Nov 2024 14:05:47 +0800
Subject: [PATCH 4/7] [BOLT] parse the PLT in two parts: header detection and
 entry detection

---
 bolt/include/bolt/Core/MCPlusBuilder.h   |  5 ---
 bolt/lib/Rewrite/RewriteInstance.cpp     | 40 ++++++++++--------------
 bolt/lib/Target/X86/X86MCPlusBuilder.cpp | 23 --------------
 3 files changed, 16 insertions(+), 52 deletions(-)

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 2cc94c52f802de..32eda0b283b883 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1495,11 +1495,6 @@ class MCPlusBuilder {
     return 0;
   }
 
-  virtual bool isMoldPLTHeader(std::vector<MCInst *> &Insns) const {
-    llvm_unreachable("not implemented");
-    return false;
-  }
-
   virtual bool analyzeVirtualMethodCall(InstructionIterator Begin,
                                         InstructionIterator End,
                                         std::vector<MCInst *> &MethodFetchInsns,
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 5d4118e51d2f1c..806edb970dab0f 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1707,7 +1707,21 @@ void RewriteInstance::disassemblePLTSectionX86(BinarySection &Section,
   const uint64_t SectionAddress = Section.getAddress();
   const uint64_t SectionSize = Section.getSize();
 
-  for (uint64_t EntryOffset = 0; EntryOffset + EntrySize <= SectionSize;
+  // Parse the PLT header
+  uint64_t HeaderSize = 16;
+  MCInst FirstInstr;
+  uint64_t FirstInstrSize;
+  disassemblePLTInstruction(Section, 0, FirstInstr, FirstInstrSize);
+  if (BC->MIB->isTerminateBranch(FirstInstr)) {
+    // The mold linker (https://github.com/rui314/mold/blob/v2.34.1/src/arch-x86-64.cc#L50)
+    // generates a unique format for the PLT. The header entry is 32 bytes long, while the 
+    // remaining entries are 16 bytes long.
+    BC->outs() << "BOLT-INFO: parsing PLT header for mold\n";
+    HeaderSize = 32;
+  }
+
+  // Parse the PLT entries
+  for (uint64_t EntryOffset = HeaderSize; EntryOffset + EntrySize <= SectionSize;
        EntryOffset += EntrySize) {
     MCInst Instruction;
     uint64_t InstrSize, InstrOffset = EntryOffset;
@@ -1724,30 +1738,8 @@ void RewriteInstance::disassemblePLTSectionX86(BinarySection &Section,
       InstrOffset += InstrSize;
     }
 
-    if (InstrOffset + InstrSize > EntryOffset + EntrySize) {
-      // Check if it is a mold header before rolling back because the mold linker generates
-      // a unique format. The header entry of the mold-style PLT is 32 bytes long, while the
-      // remaining entries are 16 bytes long. We need to skip the header entry.
-      uint64_t HeaderOffset = 0, MoldHeaderSize = 32;
-      if (EntryOffset == HeaderOffset && SectionSize >= MoldHeaderSize) {
-        std::vector<MCInst *> Insns;
-        MCInst Instructions[32]; // 32 insns at most
-        uint32_t Index = 0;
-        while (HeaderOffset < MoldHeaderSize) {
-          disassemblePLTInstruction(Section, HeaderOffset, Instructions[Index], InstrSize);
-          Insns.push_back(&Instructions[Index]);
-          HeaderOffset += InstrSize;
-          Index++;
-        }
-        // if it is a mold header, skip it
-        if (BC->MIB->isMoldPLTHeader(Insns)) {
-          BC->outs() << "BOLT-INFO: parsing the PLT of the mold linker\n";
-          EntryOffset += EntrySize;
-        }
-          
-      }
+    if (InstrOffset + InstrSize > EntryOffset + EntrySize)
       continue;
-    }
 
     uint64_t TargetAddress;
     if (!BC->MIB->evaluateMemOperandTarget(Instruction, TargetAddress,
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
index 215380085deb01..63086c06d74fd9 100644
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -2127,29 +2127,6 @@ class X86MCPlusBuilder : public MCPlusBuilder {
     return Type;
   }
 
-  /// Analyze a series of insns that match the PLT header of the mold linker
-  /// (https://github.com/rui314/mold/blob/v2.34.1/src/arch-x86-64.cc#L50).
-  /// The size of the header is 32 bytes and the format is as follows:
-  ///   endbr64
-  ///   push %r11
-  ///   push GOTPLT+8(%rip)
-  ///   jmp *GOTPLT+16(%rip)
-  ///   padding (14 bytes)
-  ///
-  bool isMoldPLTHeader(std::vector<MCInst *> &Insns) const override {
-    if (Insns.size() != 18)
-      return false;
-      
-    if (!isTerminateBranch(*Insns[0]) || !isPush(*Insns[1])
-        || !isPush(*Insns[2]) || !isIndirectBranch(*Insns[3]))
-      return false;
-      
-    for (unsigned int i = 4; i < 18; ++i)
-      if (Insns[i]->getOpcode() != X86::INT3)
-        return false;
-    return true;
-  }
-
   /// Analyze a callsite to see if it could be a virtual method call.  This only
   /// checks to see if the overall pattern is satisfied, it does not guarantee
   /// that the callsite is a true virtual method call.

>From e42256e0cf011d528e15e4d51332d1b269a8f05b Mon Sep 17 00:00:00 2001
From: patphzhang <patphzhang at tencent.com>
Date: Mon, 11 Nov 2024 15:01:44 +0800
Subject: [PATCH 5/7] [BOLT] skip the '.plt.sec' section when parsing mold
 header

---
 bolt/lib/Rewrite/RewriteInstance.cpp | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 806edb970dab0f..3c4631f0c1451e 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1708,17 +1708,19 @@ void RewriteInstance::disassemblePLTSectionX86(BinarySection &Section,
   const uint64_t SectionSize = Section.getSize();
 
   // Parse the PLT header
-  uint64_t HeaderSize = 16;
-  MCInst FirstInstr;
-  uint64_t FirstInstrSize;
-  disassemblePLTInstruction(Section, 0, FirstInstr, FirstInstrSize);
-  if (BC->MIB->isTerminateBranch(FirstInstr)) {
-    // The mold linker (https://github.com/rui314/mold/blob/v2.34.1/src/arch-x86-64.cc#L50)
-    // generates a unique format for the PLT. The header entry is 32 bytes long, while the 
-    // remaining entries are 16 bytes long.
-    BC->outs() << "BOLT-INFO: parsing PLT header for mold\n";
-    HeaderSize = 32;
-  }
+  uint64_t HeaderSize = 0;
+  if (Section.getName() != ".plt.sec") {
+    MCInst FirstInstr;
+    uint64_t FirstInstrSize;
+    disassemblePLTInstruction(Section, 0, FirstInstr, FirstInstrSize);
+    if (BC->MIB->isTerminateBranch(FirstInstr)) {
+      // The mold linker (https://github.com/rui314/mold/blob/v2.34.1/src/arch-x86-64.cc#L50)
+      // generates a unique format for the PLT. The header entry is 32 bytes long, while the 
+      // remaining entries are 16 bytes long.
+      BC->outs() << "BOLT-INFO: parsing PLT header for mold\n";
+      HeaderSize = 32;
+    }
+  }  
 
   // Parse the PLT entries
   for (uint64_t EntryOffset = HeaderSize; EntryOffset + EntrySize <= SectionSize;

>From 7133d21a5c4f7d32002a3d0a02f626ba6296fbcf Mon Sep 17 00:00:00 2001
From: patphzhang <patphzhang at tencent.com>
Date: Mon, 11 Nov 2024 15:14:51 +0800
Subject: [PATCH 6/7] [BOLT] choose the '.plt' section when parsing mold header

---
 bolt/lib/Rewrite/RewriteInstance.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 3c4631f0c1451e..1333851fa21945 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1709,7 +1709,7 @@ void RewriteInstance::disassemblePLTSectionX86(BinarySection &Section,
 
   // Parse the PLT header
   uint64_t HeaderSize = 0;
-  if (Section.getName() != ".plt.sec") {
+  if (Section.getName() == ".plt") {
     MCInst FirstInstr;
     uint64_t FirstInstrSize;
     disassemblePLTInstruction(Section, 0, FirstInstr, FirstInstrSize);

>From f2f238e128bcd7ce1f4b2f5c19dc3967a0090a55 Mon Sep 17 00:00:00 2001
From: patphzhang <patphzhang at tencent.com>
Date: Fri, 15 Nov 2024 14:50:43 +0800
Subject: [PATCH 7/7] [BOLT] support different PLT header type in disassembling

---
 bolt/include/bolt/Core/MCPlusBuilder.h      |  7 +++++
 bolt/include/bolt/Rewrite/RewriteInstance.h |  3 +++
 bolt/lib/Rewrite/RewriteInstance.cpp        | 29 ++++++++++++---------
 bolt/lib/Target/X86/X86MCPlusBuilder.cpp    | 28 ++++++++++++++++++++
 4 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 32eda0b283b883..6a619b33aaf48c 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1495,6 +1495,13 @@ class MCPlusBuilder {
     return 0;
   }
 
+  /// Analyze preamble instrucions in PLT section and try to determine
+  /// the size of the header.
+  virtual uint32_t analyzePLTHeader(std::vector<MCInst *> &Insns) const {
+    llvm_unreachable("not implemented");
+    return 0;
+  }
+
   virtual bool analyzeVirtualMethodCall(InstructionIterator Begin,
                                         InstructionIterator End,
                                         std::vector<MCInst *> &MethodFetchInsns,
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index e5b7ad63007cab..54708da2bdf41a 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -277,6 +277,9 @@ class RewriteInstance {
   /// is the expected .plt \p Section entry function size.
   void disassemblePLTSectionX86(BinarySection &Section, uint64_t EntrySize);
 
+  /// Disassemble the X86-specific .plt \p Section header and get header size.
+  uint32_t disassemblePLTHeaderX86(BinarySection &Section, uint64_t EntrySize);
+
   /// Disassemble riscv-specific .plt \p Section auxiliary function
   void disassemblePLTSectionRISCV(BinarySection &Section);
 
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 1333851fa21945..ca3f86f72d1ddf 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1708,19 +1708,7 @@ void RewriteInstance::disassemblePLTSectionX86(BinarySection &Section,
   const uint64_t SectionSize = Section.getSize();
 
   // Parse the PLT header
-  uint64_t HeaderSize = 0;
-  if (Section.getName() == ".plt") {
-    MCInst FirstInstr;
-    uint64_t FirstInstrSize;
-    disassemblePLTInstruction(Section, 0, FirstInstr, FirstInstrSize);
-    if (BC->MIB->isTerminateBranch(FirstInstr)) {
-      // The mold linker (https://github.com/rui314/mold/blob/v2.34.1/src/arch-x86-64.cc#L50)
-      // generates a unique format for the PLT. The header entry is 32 bytes long, while the 
-      // remaining entries are 16 bytes long.
-      BC->outs() << "BOLT-INFO: parsing PLT header for mold\n";
-      HeaderSize = 32;
-    }
-  }  
+  uint64_t HeaderSize = disassemblePLTHeaderX86(Section, EntrySize);
 
   // Parse the PLT entries
   for (uint64_t EntryOffset = HeaderSize; EntryOffset + EntrySize <= SectionSize;
@@ -1757,6 +1745,21 @@ void RewriteInstance::disassemblePLTSectionX86(BinarySection &Section,
   }
 }
 
+uint32_t RewriteInstance::disassemblePLTHeaderX86(BinarySection &Section,
+                                               uint64_t EntrySize) {
+  uint64_t InstrSize, InstrOffset = 0;
+  std::vector<MCInst *> Insns;
+  MCInst Instructions[32]; // 32 insns (bytes) at most
+  uint32_t Index = 0;
+  while (InstrOffset < EntrySize) {
+    disassemblePLTInstruction(Section, InstrOffset, Instructions[Index], InstrSize);
+    Insns.push_back(&Instructions[Index]);
+    InstrOffset += InstrSize;
+    Index++;
+  }
+  return BC->MIB->analyzePLTHeader(Insns);
+}
+
 void RewriteInstance::disassemblePLT() {
   auto analyzeOnePLTSection = [&](BinarySection &Section, uint64_t EntrySize) {
     if (BC->isAArch64())
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
index 63086c06d74fd9..2acff1c018c0c7 100644
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -2127,6 +2127,34 @@ class X86MCPlusBuilder : public MCPlusBuilder {
     return Type;
   }
 
+  uint32_t analyzePLTHeader(std::vector<MCInst *> &Insns) const override {
+    uint32_t HeaderSize = 0;
+    if (Insns.size() == 0) // empty header
+      return HeaderSize;
+    if (isTerminateBranch(*Insns[0])) {
+      // starting with an endbr, possible headers: mold
+      if (Insns.size() >= 4 && isPush(*Insns[1]) && isPush(*Insns[2]) &&
+          isIndirectBranch(*Insns[3])) {
+        // The mold linker (https://github.com/rui314/mold/blob/v2.34.1/src/arch-x86-64.cc#L50)
+        // generates a unique format for the PLT. The size of the header is 32 bytes and the 
+        // format is as follows:
+        ///   endbr64
+        ///   push %r11
+        ///   push GOTPLT+8(%rip)
+        ///   jmp *GOTPLT+16(%rip)
+        ///   padding (14 bytes)
+        HeaderSize = 32; // mold with CET support
+      } else {
+        // In case other linkers have new proposals.
+      }
+    } else {
+      // TODO: headers with endbr in the midddle, including the lld version plt,
+      // or headers without CET support, including R_386_PLT32, R_X86_64_PLT32,
+      // retpolineplt of lld (for Spectre v2 mitigation), and etc.
+    }
+    return HeaderSize;
+  }
+
   /// Analyze a callsite to see if it could be a virtual method call.  This only
   /// checks to see if the overall pattern is satisfied, it does not guarantee
   /// that the callsite is a true virtual method call.



More information about the llvm-commits mailing list