[lld] 7cd429f - [ELF] Add -z force-ibt and -z shstk for Intel Control-flow Enforcement Technology

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 13 23:40:43 PST 2020


Author: Fangrui Song
Date: 2020-01-13T23:39:28-08:00
New Revision: 7cd429f27d4886bb841ed0e3702e970f5f6cccd1

URL: https://github.com/llvm/llvm-project/commit/7cd429f27d4886bb841ed0e3702e970f5f6cccd1
DIFF: https://github.com/llvm/llvm-project/commit/7cd429f27d4886bb841ed0e3702e970f5f6cccd1.diff

LOG: [ELF] Add -z force-ibt and -z shstk for Intel Control-flow Enforcement Technology

This patch is a joint work by Rui Ueyama and me based on D58102 by Xiang Zhang.

It adds Intel CET (Control-flow Enforcement Technology) support to lld.
The implementation follows the draft version of psABI which you can
download from https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI.

CET introduces a new restriction on indirect jump instructions so that
you can limit the places to which you can jump to using indirect jumps.

In order to use the feature, you need to compile source files with
-fcf-protection=full.

* IBT is enabled if all input files are compiled with the flag. To force enabling ibt, pass -z force-ibt.
* SHSTK is enabled if all input files are compiled with the flag, or if -z shstk is specified.

IBT-enabled executables/shared objects have two PLT sections, ".plt" and
".plt.sec".  For the details as to why we have two sections, please read
the comments.

Reviewed By: xiangzhangllvm

Differential Revision: https://reviews.llvm.org/D59780

Added: 
    lld/test/ELF/i386-feature-cet.s
    lld/test/ELF/x86-64-feature-cet.s

Modified: 
    lld/ELF/Arch/X86.cpp
    lld/ELF/Arch/X86_64.cpp
    lld/ELF/Config.h
    lld/ELF/Driver.cpp
    lld/ELF/Options.td
    lld/ELF/SyntheticSections.cpp
    lld/ELF/SyntheticSections.h
    lld/ELF/Target.h
    lld/ELF/Writer.cpp
    lld/docs/ld.lld.1

Removed: 
    lld/test/ELF/i386-cet.s
    lld/test/ELF/x86-64-cet.s


################################################################################
diff  --git a/lld/ELF/Arch/X86.cpp b/lld/ELF/Arch/X86.cpp
index c85684b5acdb..b4daedc0f5dc 100644
--- a/lld/ELF/Arch/X86.cpp
+++ b/lld/ELF/Arch/X86.cpp
@@ -410,6 +410,71 @@ void X86::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const {
   memcpy(loc - 2, inst, sizeof(inst));
 }
 
+// If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
+// entries containing endbr32 instructions. A PLT entry will be split into two
+// parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
+namespace {
+class IntelIBT : public X86 {
+public:
+  IntelIBT();
+  void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
+  void writePlt(uint8_t *buf, const Symbol &sym,
+                uint64_t pltEntryAddr) const override;
+  void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
+
+  static const unsigned IBTPltHeaderSize = 16;
+};
+} // namespace
+
+IntelIBT::IntelIBT() { pltHeaderSize = 0; }
+
+void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
+  uint64_t va =
+      in.ibtPlt->getVA() + IBTPltHeaderSize + s.pltIndex * pltEntrySize;
+  write32le(buf, va);
+}
+
+void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
+                        uint64_t /*pltEntryAddr*/) const {
+  if (config->isPic) {
+    const uint8_t inst[] = {
+        0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
+        0xff, 0xa3, 0,    0,    0, 0, // jmp *name at GOT(%ebx)
+        0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
+    };
+    memcpy(buf, inst, sizeof(inst));
+    write32le(buf + 6, sym.getGotPltVA() - in.gotPlt->getVA());
+    return;
+  }
+
+  const uint8_t inst[] = {
+      0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
+      0xff, 0x25, 0,    0,    0, 0, // jmp *foo at GOT
+      0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
+  };
+  memcpy(buf, inst, sizeof(inst));
+  write32le(buf + 6, sym.getGotPltVA());
+}
+
+void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
+  writePltHeader(buf);
+  buf += IBTPltHeaderSize;
+
+  const uint8_t inst[] = {
+      0xf3, 0x0f, 0x1e, 0xfb,    // endbr32
+      0x68, 0,    0,    0,    0, // pushl $reloc_offset
+      0xe9, 0,    0,    0,    0, // jmpq .PLT0 at PC
+      0x66, 0x90,                // nop
+  };
+
+  for (size_t i = 0; i < numEntries; ++i) {
+    memcpy(buf, inst, sizeof(inst));
+    write32le(buf + 5, i * sizeof(object::ELF32LE::Rel));
+    write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
+    buf += sizeof(inst);
+  }
+}
+
 namespace {
 class RetpolinePic : public X86 {
 public:
@@ -553,6 +618,11 @@ TargetInfo *getX86TargetInfo() {
     return &t;
   }
 
+  if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
+    static IntelIBT t;
+    return &t;
+  }
+
   static X86 t;
   return &t;
 }

diff  --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 19e29b62c612..74b72eb91293 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -151,7 +151,7 @@ void X86_64::writePltHeader(uint8_t *buf) const {
   };
   memcpy(buf, pltData, sizeof(pltData));
   uint64_t gotPlt = in.gotPlt->getVA();
-  uint64_t plt = in.plt->getVA();
+  uint64_t plt = in.ibtPlt ? in.ibtPlt->getVA() : in.plt->getVA();
   write32le(buf + 2, gotPlt - plt + 2); // GOTPLT+8
   write32le(buf + 8, gotPlt - plt + 4); // GOTPLT+16
 }
@@ -568,6 +568,60 @@ bool X86_64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
   return false;
 }
 
+// If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
+// entries containing endbr64 instructions. A PLT entry will be split into two
+// parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
+namespace {
+class IntelIBT : public X86_64 {
+public:
+  IntelIBT();
+  void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
+  void writePlt(uint8_t *buf, const Symbol &sym,
+                uint64_t pltEntryAddr) const override;
+  void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
+
+  static const unsigned IBTPltHeaderSize = 16;
+};
+} // namespace
+
+IntelIBT::IntelIBT() { pltHeaderSize = 0; }
+
+void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
+  uint64_t va =
+      in.ibtPlt->getVA() + IBTPltHeaderSize + s.pltIndex * pltEntrySize;
+  write64le(buf, va);
+}
+
+void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
+                        uint64_t pltEntryAddr) const {
+  const uint8_t Inst[] = {
+      0xf3, 0x0f, 0x1e, 0xfa,       // endbr64
+      0xff, 0x25, 0,    0,    0, 0, // jmpq *got(%rip)
+      0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
+  };
+  memcpy(buf, Inst, sizeof(Inst));
+  write32le(buf + 6, sym.getGotPltVA() - pltEntryAddr - 10);
+}
+
+void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
+  writePltHeader(buf);
+  buf += IBTPltHeaderSize;
+
+  const uint8_t inst[] = {
+      0xf3, 0x0f, 0x1e, 0xfa,    // endbr64
+      0x68, 0,    0,    0,    0, // pushq <relocation index>
+      0xe9, 0,    0,    0,    0, // jmpq plt[0]
+      0x66, 0x90,                // nop
+  };
+
+  for (size_t i = 0; i < numEntries; ++i) {
+    memcpy(buf, inst, sizeof(inst));
+    write32le(buf + 5, i);
+    write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
+    buf += sizeof(inst);
+  }
+}
+
 // These nonstandard PLT entries are to migtigate Spectre v2 security
 // vulnerability. In order to mitigate Spectre v2, we want to avoid indirect
 // branch instructions such as `jmp *GOTPLT(%rip)`. So, in the following PLT
@@ -695,6 +749,11 @@ static TargetInfo *getTargetInfo() {
     return &t;
   }
 
+  if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
+    static IntelIBT t;
+    return &t;
+  }
+
   static X86_64 t;
   return &t;
 }

diff  --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index d7e715ebf357..06ba88a83dd4 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -154,7 +154,6 @@ struct Configuration {
   bool fixCortexA8;
   bool forceBTI;
   bool formatBinary = false;
-  bool requireCET;
   bool gcSections;
   bool gdbIndex;
   bool gnuHash = false;
@@ -202,6 +201,7 @@ struct Configuration {
   bool writeAddends;
   bool zCombreloc;
   bool zCopyreloc;
+  bool zForceIbt;
   bool zGlobal;
   bool zHazardplt;
   bool zIfuncNoplt;
@@ -215,6 +215,7 @@ struct Configuration {
   bool zOrigin;
   bool zRelro;
   bool zRodynamic;
+  bool zShstk;
   bool zText;
   bool zRetpolineplt;
   bool zWxneeded;

diff  --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 19598f52cd48..eadf06e8ef0a 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -346,8 +346,8 @@ static void checkOptions() {
       error("-execute-only and -no-rosegment cannot be used together");
   }
 
-  if (config->zRetpolineplt && config->requireCET)
-    error("--require-cet may not be used with -z retpolineplt");
+  if (config->zRetpolineplt && config->zForceIbt)
+    error("-z force-ibt may not be used with -z retpolineplt");
 
   if (config->emachine != EM_AARCH64) {
     if (config->pacPlt)
@@ -409,18 +409,18 @@ static GnuStackKind getZGnuStack(opt::InputArgList &args) {
 
 static bool isKnownZFlag(StringRef s) {
   return s == "combreloc" || s == "copyreloc" || s == "defs" ||
-         s == "execstack" || s == "force-bti" || s == "global" ||
-         s == "hazardplt" || s == "ifunc-noplt" || s == "initfirst" ||
-         s == "interpose" || s == "keep-text-section-prefix" || s == "lazy" ||
-         s == "muldefs" || s == "separate-code" ||
-         s == "separate-loadable-segments" || s == "nocombreloc" ||
-         s == "nocopyreloc" || s == "nodefaultlib" || s == "nodelete" ||
-         s == "nodlopen" || s == "noexecstack" || s == "nognustack" ||
-         s == "nokeep-text-section-prefix" || s == "norelro" ||
-         s == "noseparate-code" || s == "notext" || s == "now" ||
-         s == "origin" || s == "pac-plt" || s == "relro" ||
-         s == "retpolineplt" || s == "rodynamic" || s == "text" ||
-         s == "undefs" || s == "wxneeded" ||
+         s == "execstack" || s == "force-bti" || s == "force-ibt" ||
+         s == "global" || s == "hazardplt" || s == "ifunc-noplt" ||
+         s == "initfirst" || s == "interpose" ||
+         s == "keep-text-section-prefix" || s == "lazy" || s == "muldefs" ||
+         s == "separate-code" || s == "separate-loadable-segments" ||
+         s == "nocombreloc" || s == "nocopyreloc" || s == "nodefaultlib" ||
+         s == "nodelete" || s == "nodlopen" || s == "noexecstack" ||
+         s == "nognustack" || s == "nokeep-text-section-prefix" ||
+         s == "norelro" || s == "noseparate-code" || s == "notext" ||
+         s == "now" || s == "origin" || s == "pac-plt" || s == "relro" ||
+         s == "retpolineplt" || s == "rodynamic" || s == "shstk" ||
+         s == "text" || s == "undefs" || s == "wxneeded" ||
          s.startswith("common-page-size=") || s.startswith("max-page-size=") ||
          s.startswith("stack-size=");
 }
@@ -877,7 +877,6 @@ static void readConfigs(opt::InputArgList &args) {
   config->fixCortexA53Errata843419 = args.hasArg(OPT_fix_cortex_a53_843419);
   config->fixCortexA8 = args.hasArg(OPT_fix_cortex_a8);
   config->forceBTI = hasZOption(args, "force-bti");
-  config->requireCET = args.hasArg(OPT_require_cet);
   config->gcSections = args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false);
   config->gnuUnique = args.hasFlag(OPT_gnu_unique, OPT_no_gnu_unique, true);
   config->gdbIndex = args.hasFlag(OPT_gdb_index, OPT_no_gdb_index, false);
@@ -966,6 +965,7 @@ static void readConfigs(opt::InputArgList &args) {
       args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true);
   config->zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true);
   config->zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true);
+  config->zForceIbt = hasZOption(args, "force-ibt");
   config->zGlobal = hasZOption(args, "global");
   config->zGnustack = getZGnuStack(args);
   config->zHazardplt = hasZOption(args, "hazardplt");
@@ -983,6 +983,7 @@ static void readConfigs(opt::InputArgList &args) {
   config->zRetpolineplt = hasZOption(args, "retpolineplt");
   config->zRodynamic = hasZOption(args, "rodynamic");
   config->zSeparate = getZSeparate(args);
+  config->zShstk = hasZOption(args, "shstk");
   config->zStackSize = args::getZOptionValue(args, OPT_z, "stack-size", 0);
   config->zText = getZFlag(args, "text", "notext", true);
   config->zWxneeded = hasZOption(args, "wxneeded");
@@ -1687,12 +1688,8 @@ static void wrapSymbols(ArrayRef<WrappedSymbol> wrapped) {
 // with CET. We enable the feature only when all object files are compatible
 // with CET.
 //
-// This function returns the merged feature flags. If 0, we cannot enable CET.
 // This is also the case with AARCH64's BTI and PAC which use the similar
 // GNU_PROPERTY_AARCH64_FEATURE_1_AND mechanism.
-//
-// Note that the CET-aware PLT is not implemented yet. We do error
-// check only.
 template <class ELFT> static uint32_t getAndFeatures() {
   if (config->emachine != EM_386 && config->emachine != EM_X86_64 &&
       config->emachine != EM_AARCH64)
@@ -1704,8 +1701,12 @@ template <class ELFT> static uint32_t getAndFeatures() {
     if (config->forceBTI && !(features & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)) {
       warn(toString(f) + ": -z force-bti: file does not have BTI property");
       features |= GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
-    } else if (!features && config->requireCET)
-      error(toString(f) + ": --require-cet: file is not compatible with CET");
+    } else if (config->zForceIbt &&
+               !(features & GNU_PROPERTY_X86_FEATURE_1_IBT)) {
+      warn(toString(f) + ": -z force-ibt: file does not have "
+                         "GNU_PROPERTY_X86_FEATURE_1_IBT property");
+      features |= GNU_PROPERTY_X86_FEATURE_1_IBT;
+    }
     ret &= features;
   }
 
@@ -1713,6 +1714,9 @@ template <class ELFT> static uint32_t getAndFeatures() {
   // this does not require support in the object for correctness.
   if (config->pacPlt)
     ret |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
+  // Force enable Shadow Stack.
+  if (config->zShstk)
+    ret |= GNU_PROPERTY_X86_FEATURE_1_SHSTK;
 
   return ret;
 }

diff  --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index b86a670915c1..e3f4fa53a723 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -176,9 +176,8 @@ def fix_cortex_a53_843419: F<"fix-cortex-a53-843419">,
 def fix_cortex_a8: F<"fix-cortex-a8">,
   HelpText<"Apply fixes for ARM Cortex-A8 erratum 657417">;
 
-// This option is intentionally hidden from the user as the implementation
-// is not complete.
-def require_cet: F<"require-cet">;
+def force_bti: F<"force-bti">,
+  HelpText<"Force enable AArch64 BTI in PLT, warn if Input ELF file does not have GNU_PROPERTY_AARCH64_FEATURE_1_BTI property">;
 
 defm format: Eq<"format", "Change the input format of the inputs following this option">,
   MetaVarName<"[default,elf,binary]">;

diff  --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index fa581e4802a7..550a5b38b89b 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -2442,16 +2442,21 @@ void HashTableSection::writeTo(uint8_t *buf) {
   }
 }
 
-// On PowerPC64 the lazy symbol resolvers go into the `global linkage table`
-// in the .glink section, rather then the typical .plt section.
 PltSection::PltSection()
     : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, ".plt"),
       headerSize(target->pltHeaderSize) {
+  // On PowerPC, this section contains lazy symbol resolvers.
   if (config->emachine == EM_PPC || config->emachine == EM_PPC64) {
     name = ".glink";
     alignment = 4;
   }
 
+  // On x86 when IBT is enabled, this section contains the second PLT (lazy
+  // symbol resolvers).
+  if ((config->emachine == EM_386 || config->emachine == EM_X86_64) &&
+      (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT))
+    name = ".plt.sec";
+
   // The PLT needs to be writable on SPARC as the dynamic linker will
   // modify the instructions in the PLT entries.
   if (config->emachine == EM_SPARCV9)
@@ -2535,6 +2540,76 @@ void IpltSection::addSymbols() {
   }
 }
 
+// This is an x86-only extra PLT section and used only when a security
+// enhancement feature called CET is enabled. In this comment, I'll explain what
+// the feature is and why we have two PLT sections if CET is enabled.
+//
+// So, what does CET do? CET introduces a new restriction to indirect jump
+// instructions. CET works this way. Assume that CET is enabled. Then, if you
+// execute an indirect jump instruction, the processor verifies that a special
+// "landing pad" instruction (which is actually a repurposed NOP instruction and
+// now called "endbr32" or "endbr64") is at the jump target. If the jump target
+// does not start with that instruction, the processor raises an exception
+// instead of continuing executing code.
+//
+// If CET is enabled, the compiler emits endbr to all locations where indirect
+// jumps may jump to.
+//
+// This mechanism makes it extremely hard to transfer the control to a middle of
+// a function that is not supporsed to be a indirect jump target, preventing
+// certain types of attacks such as ROP or JOP.
+//
+// Note that the processors in the market as of 2019 don't actually support the
+// feature. Only the spec is available at the moment.
+//
+// Now, I'll explain why we have this extra PLT section for CET.
+//
+// Since you can indirectly jump to a PLT entry, we have to make PLT entries
+// start with endbr. The problem is there's no extra space for endbr (which is 4
+// bytes long), as the PLT entry is only 16 bytes long and all bytes are already
+// used.
+//
+// In order to deal with the issue, we split a PLT entry into two PLT entries.
+// Remember that each PLT entry contains code to jump to an address read from
+// .got.plt AND code to resolve a dynamic symbol lazily. With the 2-PLT scheme,
+// the former code is written to .plt.sec, and the latter code is written to
+// .plt.
+//
+// Lazy symbol resolution in the 2-PLT scheme works in the usual way, except
+// that the regular .plt is now called .plt.sec and .plt is repurposed to
+// contain only code for lazy symbol resolution.
+//
+// In other words, this is how the 2-PLT scheme works. Application code is
+// supposed to jump to .plt.sec to call an external function. Each .plt.sec
+// entry contains code to read an address from a corresponding .got.plt entry
+// and jump to that address. Addresses in .got.plt initially point to .plt, so
+// when an application calls an external function for the first time, the
+// control is transferred to a function that resolves a symbol name from
+// external shared object files. That function then rewrites a .got.plt entry
+// with a resolved address, so that the subsequent function calls directly jump
+// to a desired location from .plt.sec.
+//
+// There is an open question as to whether the 2-PLT scheme was desirable or
+// not. We could have simply extended the PLT entry size to 32-bytes to
+// accommodate endbr, and that scheme would have been much simpler than the
+// 2-PLT scheme. One reason to split PLT was, by doing that, we could keep hot
+// code (.plt.sec) from cold code (.plt). But as far as I know no one proved
+// that the optimization actually makes a 
diff erence.
+//
+// That said, the 2-PLT scheme is a part of the ABI, debuggers and other tools
+// depend on it, so we implement the ABI.
+IBTPltSection::IBTPltSection()
+    : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, ".plt") {}
+
+void IBTPltSection::writeTo(uint8_t *buf) {
+  target->writeIBTPlt(buf, in.plt->getNumEntries());
+}
+
+size_t IBTPltSection::getSize() const {
+  // 16 is the header size of .plt.
+  return 16 + in.plt->getNumEntries() * target->pltEntrySize;
+}
+
 // The string hash function for .gdb_index.
 static uint32_t computeGdbHash(StringRef s) {
   uint32_t h = 0;

diff  --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index ccc3f0a32ce5..f0a598dda51d 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -665,6 +665,14 @@ class HashTableSection final : public SyntheticSection {
 // Used for PLT entries. It usually has a PLT header for lazy binding. Each PLT
 // entry is associated with a JUMP_SLOT relocation, which may be resolved lazily
 // at runtime.
+//
+// On PowerPC, this section contains lazy symbol resolvers. A branch instruction
+// jumps to a PLT call stub, which will then jump to the target (BIND_NOW) or a
+// lazy symbol resolver.
+//
+// On x86 when IBT is enabled, this section (.plt.sec) contains PLT call stubs.
+// A call instruction jumps to a .plt.sec entry, which will then jump to the
+// target (BIND_NOW) or a .plt entry.
 class PltSection : public SyntheticSection {
 public:
   PltSection();
@@ -673,8 +681,9 @@ class PltSection : public SyntheticSection {
   bool isNeeded() const override;
   void addSymbols();
   void addEntry(Symbol &sym);
+  size_t getNumEntries() const { return entries.size(); }
 
-  size_t headerSize;
+  size_t headerSize = 0;
 
 private:
   std::vector<const Symbol *> entries;
@@ -696,6 +705,14 @@ class IpltSection final : public SyntheticSection {
   void addEntry(Symbol &sym);
 };
 
+// This is x86-only.
+class IBTPltSection : public SyntheticSection {
+public:
+  IBTPltSection();
+  void writeTo(uint8_t *Buf) override;
+  size_t getSize() const override;
+};
+
 class GdbIndexSection final : public SyntheticSection {
 public:
   struct AddressEntry {
@@ -1178,6 +1195,7 @@ struct InStruct {
   PltSection *plt;
   IpltSection *iplt;
   PPC32Got2Section *ppc32Got2;
+  IBTPltSection *ibtPlt;
   RelocationBaseSection *relaPlt;
   RelocationBaseSection *relaIplt;
   StringTableSection *shStrTab;

diff  --git a/lld/ELF/Target.h b/lld/ELF/Target.h
index 72067366b875..949a7bfdf64b 100644
--- a/lld/ELF/Target.h
+++ b/lld/ELF/Target.h
@@ -48,6 +48,7 @@ class TargetInfo {
     // All but PPC32 and PPC64 use the same format for .plt and .iplt entries.
     writePlt(buf, sym, pltEntryAddr);
   }
+  virtual void writeIBTPlt(uint8_t *buf, size_t numEntries) const {}
   virtual void addPltHeaderSymbols(InputSection &isec) const {}
   virtual void addPltSymbols(InputSection &isec, uint64_t off) const {}
 

diff  --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index c20b74821d9c..6373044d8804 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -515,6 +515,12 @@ template <class ELFT> void createSyntheticSections() {
       /*sort=*/false);
   add(in.relaIplt);
 
+  if ((config->emachine == EM_386 || config->emachine == EM_X86_64) &&
+      (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)) {
+    in.ibtPlt = make<IBTPltSection>();
+    add(in.ibtPlt);
+  }
+
   in.plt = make<PltSection>();
   add(in.plt);
   in.iplt = make<IpltSection>();

diff  --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1
index dce1e6d556fe..92d67b17e24e 100644
--- a/lld/docs/ld.lld.1
+++ b/lld/docs/ld.lld.1
@@ -605,6 +605,10 @@ segment.
 .It Cm force-bti
 Force enable AArch64 BTI instruction in PLT, warn if Input ELF file does not have GNU_PROPERTY_AARCH64_FEATURE_1_BTI property.
 .Pp
+.It Cm force-ibt
+Force enable Intel Indirect Branch Tracking in PLT, warn if an input ELF file
+does not have GNU_PROPERTY_X86_FEATURE_1_IBT property.
+.Pp
 .It Cm global
 Sets the
 .Dv DF_1_GLOBAL flag in the
@@ -719,6 +723,9 @@ allows overlap between two executable segments, or two non-executable segments.
 .Cm separate-loadable-segments
 disallows overlap.
 .Pp
+.It Cm shstk
+x86 only, use shadow stack.
+.Pp
 .It Cm stack-size Ns = Ns Ar size
 Set the main thread's stack size to
 .Ar size .

diff  --git a/lld/test/ELF/i386-cet.s b/lld/test/ELF/i386-cet.s
deleted file mode 100644
index 125c7977ee28..000000000000
--- a/lld/test/ELF/i386-cet.s
+++ /dev/null
@@ -1,47 +0,0 @@
-# REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=i386-unknown-linux %s -o %t.o
-# RUN: llvm-mc -filetype=obj -triple=i386-unknown-linux %p/Inputs/i386-cet1.s -o %t1.o
-# RUN: llvm-mc -filetype=obj -triple=i386-unknown-linux %p/Inputs/i386-cet2.s -o %t2.o
-# RUN: llvm-mc -filetype=obj -triple=i386-unknown-linux %p/Inputs/i386-cet3.s -o %t3.o
-# RUN: llvm-mc -filetype=obj -triple=i386-unknown-linux %p/Inputs/i386-cet4.s -o %t4.o
-
-# RUN: ld.lld -e func1 %t.o %t1.o -o %t
-# RUN: llvm-readelf -n %t | FileCheck -check-prefix=CET -match-full-lines %s
-
-# RUN: ld.lld -e func1 %t.o %t2.o -o %t
-# RUN: llvm-readelf -n %t | FileCheck -check-prefix=CET -match-full-lines %s
-
-# CET: Properties: x86 feature: IBT, SHSTK
-
-# RUN: ld.lld -e func1 %t.o %t3.o -o %t
-# RUN: llvm-readelf -S %t | FileCheck -check-prefix=NOCET %s
-
-# NOCET:     Section Headers
-# NOCET-NOT: .note.gnu.property
-
-# RUN: not ld.lld -e func1 %t.o %t3.o -o %t --require-cet 2>&1 \
-# RUN:   | FileCheck -check-prefix=ERROR %s
-# ERROR: i386-cet.s.tmp3.o: --require-cet: file is not compatible with CET
-
-# RUN: ld.lld -e func1 %t.o %t4.o -o %t
-# RUN: llvm-readelf -n %t | FileCheck -check-prefix=NOSHSTK -match-full-lines %s
-
-# Check .note.gnu.protery without property SHSTK.
-# NOSHSTK: Properties: x86 feature: IBT
-
-.section ".note.gnu.property", "a"
-.long 4
-.long 0xc
-.long 0x5
-.asciz "GNU"
-
-.long 0xc0000002
-.long 4
-.long 3
-
-.text
-.globl func1
-.type func1, at function
-func1:
-  call func2
-  ret

diff  --git a/lld/test/ELF/i386-feature-cet.s b/lld/test/ELF/i386-feature-cet.s
new file mode 100644
index 000000000000..1c59ffabd432
--- /dev/null
+++ b/lld/test/ELF/i386-feature-cet.s
@@ -0,0 +1,93 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=i386 %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple=i386 %p/Inputs/i386-cet1.s -o %t1.o
+# RUN: llvm-mc -filetype=obj -triple=i386 %p/Inputs/i386-cet2.s -o %t2.o
+# RUN: llvm-mc -filetype=obj -triple=i386 %p/Inputs/i386-cet3.s -o %t3.o
+# RUN: llvm-mc -filetype=obj -triple=i386 %p/Inputs/i386-cet4.s -o %t4.o
+
+# RUN: ld.lld -e func1 %t.o %t1.o -o %t
+# RUN: llvm-readelf -n %t | FileCheck --check-prefix=CET --match-full-lines %s
+
+# RUN: ld.lld -e func1 %t.o %t2.o -o %t
+# RUN: llvm-readelf -n %t | FileCheck --check-prefix=CET --match-full-lines %s
+
+# CET: Properties: x86 feature: IBT, SHSTK
+
+# RUN: ld.lld -e func1 %t.o %t3.o -o %t
+# RUN: llvm-readelf -S %t | FileCheck --check-prefix=NOCET %s
+
+# NOCET:     Section Headers
+# NOCET-NOT: .note.gnu.property
+
+# RUN: ld.lld -e func1 %t.o %t3.o -o %t -z force-ibt 2>&1 \
+# RUN:   | FileCheck --check-prefix=WARN %s
+# WARN: {{.*}}.o: -z force-ibt: file does not have GNU_PROPERTY_X86_FEATURE_1_IBT property
+
+# RUN: ld.lld -e func1 %t.o %t4.o -o %t
+# RUN: llvm-readelf -n %t | FileCheck --check-prefix=NOSHSTK %s
+
+# Check .note.gnu.protery without property SHSTK.
+# NOSHSTK: Properties: x86 feature: IBT{{$}}
+
+# RUN: ld.lld -shared %t1.o -soname=so -o %t1.so
+# RUN: ld.lld -e func1 %t.o %t1.so -o %t
+# RUN: llvm-readelf -n %t | FileCheck --check-prefix=CET --match-full-lines %s
+# RUN: llvm-readelf -x .got.plt %t | FileCheck --check-prefix=GOTPLT %s
+# RUN: llvm-objdump -d --no-show-raw-insn --print-imm-hex %t | FileCheck --check-prefix=DISASM %s
+
+# GOTPLT:      Hex dump of section '.got.plt':
+# GOTPLT-NEXT: 0x004032d0 50224000 00000000 00000000 20124000
+# GOTPLT-NEXT: 0x004032e0 0b124000
+
+# DISASM:      Disassembly of section .text:
+# DISASM:      00401200 func1:
+# DISASM-NEXT: 401200:       calll   0x2b <func2+0x401230>
+# DISASM-NEXT: 401205:       calll   0x36 <ifunc>
+# DISASM-NEXT:               retl
+
+# DISASM:      Disassembly of section .plt:
+# DISASM:      00401210 .plt:
+# DISASM-NEXT: 401210:       pushl   0x4032d4
+# DISASM-NEXT:               jmpl    *0x4032d8
+# DISASM-NEXT:               nop
+# DISASM-NEXT:               nop
+# DISASM-NEXT:               nop
+# DISASM-NEXT:               nop
+# DISASM-NEXT:               endbr32
+# DISASM-NEXT:               pushl   $0x0
+# DISASM-NEXT:               jmp     -0x1e <.plt>
+# DISASM-NEXT:               nop
+
+# DISASM:      Disassembly of section .plt.sec:
+# DISASM:      00401230 .plt.sec:
+# DISASM-NEXT: 401230:       endbr32
+# DISASM-NEXT:               jmpl    *0x4032dc
+# DISASM-NEXT:               nopw    (%eax,%eax)
+
+# DISASM:      Disassembly of section .iplt:
+# DISASM:      00401240 ifunc:
+# DISASM-NEXT: 401240:       endbr32
+# DISASM-NEXT:               jmpl    *0x4032e0
+# DISASM-NEXT:               nopw    (%eax,%eax)
+
+.section ".note.gnu.property", "a"
+.long 4
+.long 0xc
+.long 0x5
+.asciz "GNU"
+
+.long 0xc0000002 # GNU_PROPERTY_X86_FEATURE_1_AND
+.long 4
+.long 3          # GNU_PROPERTY_X86_FEATURE_1_IBT and SHSTK
+
+.text
+.globl func1
+.type func1, at function
+func1:
+  call func2
+  call ifunc
+  ret
+
+.type ifunc, at gnu_indirect_function
+ifunc:
+  ret

diff  --git a/lld/test/ELF/x86-64-cet.s b/lld/test/ELF/x86-64-cet.s
deleted file mode 100644
index 3fb3716ed4e9..000000000000
--- a/lld/test/ELF/x86-64-cet.s
+++ /dev/null
@@ -1,48 +0,0 @@
-# REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/x86-64-cet1.s -o %t1.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/x86-64-cet2.s -o %t2.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/x86-64-cet3.s -o %t3.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/x86-64-cet4.s -o %t4.o
-
-# RUN: ld.lld -e func1 %t.o %t1.o -o %t
-# RUN: llvm-readelf -n %t | FileCheck -check-prefix=CET -match-full-lines %s
-
-# RUN: ld.lld -e func1 %t.o %t2.o -o %t
-# RUN: llvm-readelf -n %t | FileCheck -check-prefix=CET -match-full-lines %s
-
-# CET: Properties: x86 feature: IBT, SHSTK
-
-# RUN: ld.lld -e func1 %t.o %t3.o -o %t
-# RUN: llvm-readelf -S %t | FileCheck -check-prefix=NOCET %s
-
-# NOCET:     Section Headers
-# NOCET-NOT: .note.gnu.property
-
-# RUN: not ld.lld -e func1 %t.o %t3.o -o %t --require-cet 2>&1 \
-# RUN:   | FileCheck -check-prefix=ERROR %s
-# ERROR: x86-64-cet.s.tmp3.o: --require-cet: file is not compatible with CET
-
-# RUN: ld.lld -e func1 %t.o %t4.o -o %t
-# RUN: llvm-readelf -n %t | FileCheck -check-prefix=NOSHSTK -match-full-lines %s
-
-# Check .note.gnu.protery without property SHSTK.
-# NOSHSTK: Properties: x86 feature: IBT
-
-.section ".note.gnu.property", "a"
-.long 4
-.long 0x10
-.long 0x5
-.asciz "GNU"
-
-.long 0xc0000002
-.long 4
-.long 3
-.long 0
-
-.text
-.globl func1
-.type func1, at function
-func1:
-  call func2
-  ret

diff  --git a/lld/test/ELF/x86-64-feature-cet.s b/lld/test/ELF/x86-64-feature-cet.s
new file mode 100644
index 000000000000..9899bec1e084
--- /dev/null
+++ b/lld/test/ELF/x86-64-feature-cet.s
@@ -0,0 +1,92 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %p/Inputs/x86-64-cet1.s -o %t1.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %p/Inputs/x86-64-cet2.s -o %t2.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %p/Inputs/x86-64-cet3.s -o %t3.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %p/Inputs/x86-64-cet4.s -o %t4.o
+
+# RUN: ld.lld -e func1 %t.o %t1.o -o %t
+# RUN: llvm-readelf -n %t | FileCheck --check-prefix=CET --match-full-lines %s
+
+# RUN: ld.lld -e func1 %t.o %t2.o -o %t
+# RUN: llvm-readelf -n %t | FileCheck --check-prefix=CET --match-full-lines %s
+
+# CET: Properties: x86 feature: IBT, SHSTK
+
+# RUN: ld.lld -e func1 %t.o %t3.o -o %t
+# RUN: llvm-readelf -S %t | FileCheck --check-prefix=NOCET %s
+
+# NOCET:     Section Headers
+# NOCET-NOT: .note.gnu.property
+
+# RUN: ld.lld -e func1 %t.o %t3.o -o %t -z force-ibt 2>&1 \
+# RUN:   | FileCheck --check-prefix=WARN %s
+# WARN: {{.*}}.o: -z force-ibt: file does not have GNU_PROPERTY_X86_FEATURE_1_IBT property
+
+# RUN: ld.lld -e func1 %t.o %t4.o -o %t
+# RUN: llvm-readelf -n %t | FileCheck --check-prefix=NOSHSTK %s
+
+# Check .note.gnu.protery without property SHSTK.
+# NOSHSTK: Properties: x86 feature: IBT{{$}}
+
+# RUN: ld.lld -shared %t1.o -soname=so -o %t1.so
+# RUN: ld.lld -e func1 %t.o %t1.so -o %t
+# RUN: llvm-readelf -n %t | FileCheck --check-prefix=CET --match-full-lines %s
+# RUN: llvm-readelf -x .got.plt %t | FileCheck --check-prefix=GOTPLT %s
+# RUN: llvm-objdump -d --no-show-raw-insn --print-imm-hex %t | FileCheck --check-prefix=DISASM %s
+
+# GOTPLT:      Hex dump of section '.got.plt':
+# GOTPLT-NEXT: 203480 80232000 00000000 00000000 00000000
+# GOTPLT-NEXT: 203490 00000000 00000000 50132000 00000000
+# GOTPLT-NEXT: 2034a0 00000000 00000000
+
+# DISASM:      Disassembly of section .text:
+# DISASM:      0000000000201330 func1:
+# DISASM-NEXT: 201330:       callq   0x2b <func2+0x201360>
+# DISASM-NEXT: 201335:       callq   0x36 <func2+0x201370>
+# DISASM-NEXT:               retq
+
+# DISASM:      Disassembly of section .plt:
+# DISASM:      0000000000201340 .plt:
+# DISASM-NEXT: 201340:       pushq   0x2142(%rip)
+# DISASM-NEXT:               jmpq    *0x2144(%rip)
+# DISASM-NEXT:               nopl    (%rax)
+# DISASM-NEXT:               endbr64
+# DISASM-NEXT:               pushq   $0x0
+# DISASM-NEXT:               jmp     -0x1e <.plt>
+# DISASM-NEXT:               nop
+
+# DISASM:      Disassembly of section .plt.sec:
+# DISASM:      0000000000201360 .plt.sec:
+# DISASM-NEXT: 201360:       endbr64
+# DISASM-NEXT:               jmpq    *0x212e(%rip)
+# DISASM-NEXT:               nopw    (%rax,%rax)
+
+# DISASM:      Disassembly of section .iplt:
+# DISASM:      0000000000201370 .iplt:
+# DISASM-NEXT: 201370:       endbr64
+# DISASM-NEXT:               jmpq    *0x2126(%rip)
+# DISASM-NEXT:               nopw    (%rax,%rax)
+
+.section ".note.gnu.property", "a"
+.long 4
+.long 0x10
+.long 0x5
+.asciz "GNU"
+
+.long 0xc0000002 # GNU_PROPERTY_X86_FEATURE_1_AND
+.long 4
+.long 3          # GNU_PROPERTY_X86_FEATURE_1_IBT and SHSTK
+.long 0
+
+.text
+.globl func1
+.type func1, at function
+func1:
+  call func2
+  call ifunc
+  ret
+
+.type ifunc, at gnu_indirect_function
+ifunc:
+  ret


        


More information about the llvm-commits mailing list