[lld] [RISCV][LLD] Zcmt RISC-V extension in lld (PR #183450)

via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 3 23:59:33 PST 2026


https://github.com/LukeZhuang updated https://github.com/llvm/llvm-project/pull/183450

>From a1feb6b2f9c1653b7a8e3c9e571c76718eef538b Mon Sep 17 00:00:00 2001
From: Zhi Zhuang <zhuangzhi.zz at alibaba-inc.com>
Date: Tue, 7 Oct 2025 20:25:01 +0200
Subject: [PATCH] [RISCV][LLD] Zcmt RISC-V extension in lld (#183450)

This patch implements the optimization for the Zcmt extension in LLD,
which helps saving notable code-size, especially on the embedded executables.

For more details about Zcmt extension, check the psABI here:
https://riscv.github.io/riscv-isa-manual/snapshot/unprivileged/#insns-tablejump

We scan and collect call/jump relocType in each section before the linker relaxation,
and record the symbols with the bytes they can save each one of them into a table.
Then finalize the table by choosing the most valuable candidates (because the table
size is rather limited). And use this table in the linker relaxation loop to relax
call/jumps whose destination symbol is in our candidate list. Finally we create a
new TableJump section and write that table to it.

This is a continuation of PR #77884 and #163142

Co-authored-by: Craig Topper <craig.topper at sifive.com>
Co-authored-by: VincentWu <43398706+Xinlong-Wu at users.noreply.github.com>
Co-authored-by: Scott Egerton <9487234+ScottEgerton at users.noreply.github.com>
Co-authored-by: Robin Kastberg <Robin.Kastberg at iar.com>
Co-authored-by: Fangrui Song <i at maskray.me>
---
 lld/ELF/Arch/RISCV.cpp                 | 278 ++++++++++++++++++++++++-
 lld/ELF/Config.h                       |   2 +
 lld/ELF/Driver.cpp                     |   1 +
 lld/ELF/Options.td                     |   5 +
 lld/test/ELF/riscv-no-tbljal-call.s    |  34 +++
 lld/test/ELF/riscv-tbljal-call.s       |  52 +++++
 lld/test/ELF/riscv-tbljal-many-jumps.s |  57 +++++
 lld/test/ELF/riscv-tbljal-syms.s       |  42 ++++
 8 files changed, 465 insertions(+), 6 deletions(-)
 create mode 100644 lld/test/ELF/riscv-no-tbljal-call.s
 create mode 100644 lld/test/ELF/riscv-tbljal-call.s
 create mode 100644 lld/test/ELF/riscv-tbljal-many-jumps.s
 create mode 100644 lld/test/ELF/riscv-tbljal-syms.s

diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index 85f49c9260565..0ee226a79a996 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -9,6 +9,7 @@
 #include "InputFiles.h"
 #include "OutputSections.h"
 #include "RelocScan.h"
+#include "SymbolTable.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
@@ -32,6 +33,7 @@ class RISCV final : public TargetInfo {
 public:
   RISCV(Ctx &);
   uint32_t calcEFlags() const override;
+  void initTargetSpecificSections() override;
   int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
   void writeGotHeader(uint8_t *buf) const override;
   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
@@ -66,6 +68,33 @@ class RISCV final : public TargetInfo {
   SmallVector<std::pair<uint64_t, uint64_t>, 0> synthesizedAligns;
 };
 
+struct TableJumpEntry {
+  int saved;
+  int index;
+};
+
+// Used by RISC-V Zcmt table jump relaxation.
+class TableJumpSection final : public SyntheticSection {
+public:
+  TableJumpSection(Ctx &);
+  size_t getSize() const override;
+  void writeTo(uint8_t *buf) override;
+
+  void finalizeContents() override;
+  int getCMJTEntryIndex(const Symbol *sym) const;
+  int getCMJALTEntryIndex(const Symbol *sym) const;
+  void addEntry(const Symbol *sym, int saved, bool isCMJT);
+
+private:
+  static constexpr size_t maxCMJTEntrySize = 32;
+  static constexpr size_t maxCMJALTEntrySize = 224;
+  static constexpr size_t startCMJALTEntryIdx = 32;
+
+  // Candidate maps: symbol -> (total code size reduction, table index).
+  llvm::DenseMap<const Symbol *, TableJumpEntry> cmjtCandidates;
+  llvm::DenseMap<const Symbol *, TableJumpEntry> cmjaltCandidates;
+};
+
 } // end anonymous namespace
 
 // These are internal relocation numbers for GP/X0 relaxation. They aren't part
@@ -74,6 +103,7 @@ class RISCV final : public TargetInfo {
 #define INTERNAL_R_RISCV_GPREL_S 257
 #define INTERNAL_R_RISCV_X0REL_I 258
 #define INTERNAL_R_RISCV_X0REL_S 259
+#define INTERNAL_R_RISCV_TBJAL 260
 
 const uint64_t dtpOffset = 0x800;
 
@@ -189,6 +219,18 @@ uint32_t RISCV::calcEFlags() const {
   return target;
 }
 
+void RISCV::initTargetSpecificSections() {
+  if (ctx.arg.relaxTbljal) {
+    ctx.in.riscvTableJump = std::make_unique<TableJumpSection>(ctx);
+    ctx.inputSections.push_back(ctx.in.riscvTableJump.get());
+
+    Symbol *s = ctx.symtab->addSymbol(Defined{
+        ctx, /*file=*/ctx.internalFile, "__jvt_base$", STB_GLOBAL, STV_DEFAULT,
+        STT_NOTYPE, /*value=*/0, /*size=*/0, ctx.in.riscvTableJump.get()});
+    s->isUsedInRegularObj = true;
+  }
+}
+
 int64_t RISCV::getImplicitAddend(const uint8_t *buf, RelType type) const {
   switch (type) {
   default:
@@ -541,6 +583,9 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
     return;
   }
 
+  case INTERNAL_R_RISCV_TBJAL:
+    return;
+
   case R_RISCV_ADD8:
     *loc += val;
     return;
@@ -795,16 +840,44 @@ void elf::initSymbolAnchors(Ctx &ctx) {
   }
 }
 
+static bool relaxTableJump(Ctx &ctx, const InputSection &sec, size_t i,
+                           uint64_t loc, Relocation &r, uint32_t &remove) {
+  if (!ctx.in.riscvTableJump)
+    return false;
+
+  uint32_t insn = read32le(sec.contentMaybeDecompress().data() + r.offset +
+                           (r.type == R_RISCV_JAL ? 0 : 4));
+  uint8_t rd = extractBits(insn, 11, 7);
+  int tblEntryIndex = -1;
+  auto &tableJump = static_cast<TableJumpSection &>(*ctx.in.riscvTableJump);
+  if (rd == X_X0)
+    tblEntryIndex = tableJump.getCMJTEntryIndex(r.sym);
+  else if (rd == X_RA)
+    tblEntryIndex = tableJump.getCMJALTEntryIndex(r.sym);
+
+  if (tblEntryIndex < 0)
+    return false;
+  sec.relaxAux->relocTypes[i] = INTERNAL_R_RISCV_TBJAL;
+  sec.relaxAux->writes.push_back(0xA002 |
+                                 (tblEntryIndex << 2)); // cm.jt or cm.jalt
+  remove = r.type == R_RISCV_JAL ? 2 : 6;
+  return true;
+}
+
+static int64_t getCallDisplace(Ctx &ctx, const Relocation &r, uint64_t loc) {
+  const Symbol &sym = *r.sym;
+  const uint64_t dest =
+      (r.expr == R_PLT_PC ? sym.getPltVA(ctx) : sym.getVA(ctx)) + r.addend;
+  return dest - loc;
+}
+
 // Relax R_RISCV_CALL/R_RISCV_CALL_PLT auipc+jalr to c.j, c.jal, or jal.
 static void relaxCall(Ctx &ctx, const InputSection &sec, size_t i, uint64_t loc,
                       Relocation &r, uint32_t &remove) {
   const bool rvc = getEFlags(ctx, sec.file) & EF_RISCV_RVC;
-  const Symbol &sym = *r.sym;
   const uint64_t insnPair = read64le(sec.content().data() + r.offset);
   const uint32_t rd = extractBits(insnPair, 32 + 11, 32 + 7);
-  const uint64_t dest =
-      (r.expr == R_PLT_PC ? sym.getPltVA(ctx) : sym.getVA(ctx)) + r.addend;
-  const int64_t displace = dest - loc;
+  const int64_t displace = getCallDisplace(ctx, r, loc);
 
   // When the caller specifies the old value of `remove`, disallow its
   // increment.
@@ -817,6 +890,9 @@ static void relaxCall(Ctx &ctx, const InputSection &sec, size_t i, uint64_t loc,
     sec.relaxAux->relocTypes[i] = R_RISCV_RVC_JUMP;
     sec.relaxAux->writes.push_back(0x2001); // c.jal
     remove = 6;
+  } else if (remove >= (r.type == R_RISCV_JAL ? 2 : 6) &&
+             relaxTableJump(ctx, sec, i, loc, r, remove)) {
+    // relaxTableJump sets remove
   } else if (remove >= 4 && isInt<21>(displace)) {
     sec.relaxAux->relocTypes[i] = R_RISCV_JAL;
     sec.relaxAux->writes.push_back(0x6f | rd << 7); // jal
@@ -940,6 +1016,10 @@ static bool relax(Ctx &ctx, int pass, InputSection &sec) {
         relaxCall(ctx, sec, i, loc, r, remove);
       }
       break;
+    case R_RISCV_JAL:
+      if (relaxable(relocs, i))
+        relaxTableJump(ctx, sec, i, loc, r, remove);
+      break;
     case R_RISCV_TPREL_HI20:
     case R_RISCV_TPREL_ADD:
     case R_RISCV_TPREL_LO12_I:
@@ -999,6 +1079,46 @@ static bool relax(Ctx &ctx, int pass, InputSection &sec) {
   return changed;
 }
 
+static void scanTableJumpEntries(Ctx &ctx, const InputSection &sec) {
+  for (auto [i, r] : llvm::enumerate(sec.relocations)) {
+    if (!r.sym->isDefined())
+      continue;
+    if (i + 1 == sec.relocs().size() ||
+        sec.relocs()[i + 1].type != R_RISCV_RELAX)
+      continue;
+    switch (r.type) {
+    case R_RISCV_JAL:
+    case R_RISCV_CALL:
+    case R_RISCV_CALL_PLT: {
+      uint32_t insn = read32le(sec.contentMaybeDecompress().data() + r.offset +
+                               (r.type == R_RISCV_JAL ? 0 : 4));
+      uint8_t rd = extractBits(insn, 11, 7);
+
+      // Skip the jal/j which can be potentially relaxed to c.jal/c.j. No need
+      // to waste the limited jumptable entries on them.
+      const uint64_t loc = sec.getVA() + r.offset;
+      const int64_t displace = getCallDisplace(ctx, r, loc);
+      const bool rvc = getEFlags(ctx, sec.file) & EF_RISCV_RVC;
+      if (rvc && isInt<12>(displace)) {
+        if ((rd == X_X0) || (rd == X_RA && !ctx.arg.is64))
+          continue;
+      }
+
+      // If the jal/j can be relaxed to a 32-bit instruction, the saving becomes
+      // actually 2 bytes (4->2), otherwise it's 6 bytes (8->2)
+      int saved = isInt<21>(displace) ? 2 : 6;
+
+      auto &tableJump = static_cast<TableJumpSection &>(*ctx.in.riscvTableJump);
+      if (rd == X_X0)
+        tableJump.addEntry(r.sym, saved, /*isCMJT=*/true);
+      else if (rd == X_RA)
+        tableJump.addEntry(r.sym, saved, /*isCMJT=*/false);
+      break;
+    }
+    }
+  }
+}
+
 // When relaxing just R_RISCV_ALIGN, relocDeltas is usually changed only once in
 // the absence of a linker script. For call and load/store R_RISCV_RELAX, code
 // shrinkage may reduce displacement and make more relocations eligible for
@@ -1008,10 +1128,23 @@ static bool relax(Ctx &ctx, int pass, InputSection &sec) {
 // relaxation pass.
 bool RISCV::relaxOnce(int pass) const {
   llvm::TimeTraceScope timeScope("RISC-V relaxOnce");
-  if (pass == 0)
+  SmallVector<InputSection *, 0> storage;
+  if (pass == 0) {
     initSymbolAnchors(ctx);
 
-  SmallVector<InputSection *, 0> storage;
+    // Only do scan/finalize once before all relaxations
+    if (ctx.in.riscvTableJump) {
+      // scan all relocations and build the Zcmt jump table
+      for (OutputSection *osec : ctx.outputSections) {
+        if (!(osec->flags & SHF_EXECINSTR))
+          continue;
+        for (InputSection *sec : getInputSections(*osec, storage))
+          scanTableJumpEntries(ctx, *sec);
+      }
+      ctx.in.riscvTableJump->finalizeContents();
+    }
+  }
+
   bool changed = false;
   for (OutputSection *osec : ctx.outputSections) {
     if (!(osec->flags & SHF_EXECINSTR))
@@ -1194,6 +1327,14 @@ void RISCV::finalizeRelax(int passes) const {
           case INTERNAL_R_RISCV_X0REL_I:
           case INTERNAL_R_RISCV_X0REL_S:
             break;
+          case INTERNAL_R_RISCV_TBJAL:
+            assert(ctx.arg.relaxTbljal &&
+                   "TBJAL relocation without --relax-tbljal");
+            assert((aux.writes[writesIdx] & 0xfc03) == 0xA002 &&
+                   "malformed cm.jt/cm.jalt encoding");
+            skip = 2;
+            write16le(p, aux.writes[writesIdx++]);
+            break;
           case R_RISCV_RELAX:
             // Used by relaxTlsLe to indicate the relocation is ignored.
             break;
@@ -1532,3 +1673,128 @@ void elf::mergeRISCVAttributesSections(Ctx &ctx) {
 }
 
 void elf::setRISCVTargetInfo(Ctx &ctx) { ctx.target.reset(new RISCV(ctx)); }
+
+TableJumpSection::TableJumpSection(Ctx &ctx)
+    : SyntheticSection(ctx, ".riscv.jvt", SHT_PROGBITS, SHF_ALLOC,
+                       /*alignment=*/64) {}
+
+int TableJumpSection::getCMJTEntryIndex(const Symbol *sym) const {
+  auto it = cmjtCandidates.find(sym);
+  return it != cmjtCandidates.end() ? it->second.index : -1;
+}
+
+int TableJumpSection::getCMJALTEntryIndex(const Symbol *sym) const {
+  auto it = cmjaltCandidates.find(sym);
+  return it != cmjaltCandidates.end() ? startCMJALTEntryIdx + it->second.index
+                                      : -1;
+}
+
+void TableJumpSection::addEntry(const Symbol *sym, int saved, bool isCMJT) {
+  auto &candidates = isCMJT ? cmjtCandidates : cmjaltCandidates;
+  if (!candidates.count(sym))
+    candidates[sym] = TableJumpEntry{/*saved=*/0, /*index=*/-1};
+  candidates[sym].saved += saved;
+}
+
+// Sort candidates by code size reduction (descending), truncate to maxSize,
+// and drop entries whose reduction doesn't cover the table entry cost.
+static void
+selectEntries(Ctx &ctx,
+              llvm::DenseMap<const Symbol *, TableJumpEntry> &candidates,
+              uint32_t maxSize) {
+  SmallVector<std::pair<const Symbol *, TableJumpEntry>, 0> entries(
+      candidates.begin(), candidates.end());
+  llvm::sort(entries, [](const auto &a, const auto &b) {
+    return a.second.saved > b.second.saved;
+  });
+  if (entries.size() > maxSize)
+    entries.resize(maxSize);
+  // Drop entries that don't save enough to cover the table entry cost.
+  while (!entries.empty() &&
+         entries.back().second.saved < (int)ctx.arg.wordsize)
+    entries.pop_back();
+
+  // Fill the index back to the map
+  for (auto [i, entry] : llvm::enumerate(entries))
+    candidates[entry.first].index = (int)i;
+
+  // Remove the candidates whose index == -1, which means not selected
+  // (llvm::erase_if does not seems to work on DenseMap)
+  for (auto it = candidates.begin(), end = candidates.end(); it != end;) {
+    if (it->second.index >= 0)
+      ++it;
+    else
+      candidates.erase(it++);
+  }
+}
+
+void TableJumpSection::finalizeContents() {
+  // cleanup the candidates by removing non-interesting ones and picking
+  // the most profitable ones (since the list size is quite limited)
+  selectEntries(ctx, cmjtCandidates, maxCMJTEntrySize);
+  selectEntries(ctx, cmjaltCandidates, maxCMJALTEntrySize);
+
+  // We have three choices here:
+  // 1. adopt both cm.jt/cm.jalt
+  // 2. only adopt cm.jt
+  // 3. give up
+  // (cm.jalt only is meaningless, since the table size is the same as #1)
+
+  int savedBoth =
+      (startCMJALTEntryIdx + cmjaltCandidates.size()) * ctx.arg.wordsize;
+  int savedCMJTOnly = cmjtCandidates.size() * ctx.arg.wordsize;
+  savedBoth = -savedBoth;
+  savedCMJTOnly = -savedCMJTOnly;
+
+  for (auto &[sym, entry] : cmjtCandidates) {
+    savedCMJTOnly += entry.saved;
+    savedBoth += entry.saved;
+  }
+  for (auto &[sym, entry] : cmjaltCandidates)
+    savedBoth += entry.saved;
+
+  // Using cm.jalt requires padding the cm.jt region to 32 entries.
+  // Drop cm.jalt if the padding cost exceeds cm.jalt's benefit.
+  if (!cmjaltCandidates.empty() && savedBoth < savedCMJTOnly)
+    cmjaltCandidates.clear();
+
+  // If overall code size doesn't decrease, give up entirely.
+  if (savedCMJTOnly <= 0) {
+    Log(ctx) << "table jump relaxation didn't reduce code size";
+    cmjtCandidates.clear();
+    cmjaltCandidates.clear();
+  }
+}
+
+size_t TableJumpSection::getSize() const {
+  if (!cmjaltCandidates.empty())
+    return (startCMJALTEntryIdx + cmjaltCandidates.size()) * ctx.arg.wordsize;
+  return cmjtCandidates.size() * ctx.arg.wordsize;
+}
+
+static void
+writeEntries(Ctx &ctx, uint8_t *buf,
+             const llvm::DenseMap<const Symbol *, TableJumpEntry> &candidates) {
+  // Order the candidates by their indexes
+  SmallVector<std::pair<const Symbol *, TableJumpEntry>, 0> entries(
+      candidates.begin(), candidates.end());
+  llvm::sort(entries, [](const auto &a, const auto &b) {
+    return a.second.index < b.second.index;
+  });
+  for (auto &[sym, entry] : entries) {
+    uint64_t va = sym->getVA(ctx);
+    if (ctx.arg.is64)
+      write64le(buf, va);
+    else
+      write32le(buf, va);
+    buf += ctx.arg.wordsize;
+  }
+}
+
+void TableJumpSection::writeTo(uint8_t *buf) {
+  if (!cmjtCandidates.empty())
+    writeEntries(ctx, buf, cmjtCandidates);
+  if (!cmjaltCandidates.empty())
+    writeEntries(ctx, buf + startCMJALTEntryIdx * ctx.arg.wordsize,
+                 cmjaltCandidates);
+}
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index a9f74460f6f99..abc6b19daa116 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -367,6 +367,7 @@ struct Config {
   bool resolveGroups;
   bool relrGlibc = false;
   bool relrPackDynRelocs = false;
+  bool relaxTbljal;
   llvm::DenseSet<llvm::StringRef> saveTempsArgs;
   llvm::SmallVector<std::pair<llvm::GlobPattern, uint32_t>, 0> shuffleSections;
   bool singleRoRx;
@@ -574,6 +575,7 @@ struct InStruct {
   std::unique_ptr<RelroPaddingSection> relroPadding;
   std::unique_ptr<SyntheticSection> armCmseSGSection;
   std::unique_ptr<PPC64LongBranchTargetSection> ppc64LongBranchTarget;
+  std::unique_ptr<SyntheticSection> riscvTableJump;
   std::unique_ptr<SyntheticSection> mipsAbiFlags;
   std::unique_ptr<MipsGotSection> mipsGot;
   std::unique_ptr<SyntheticSection> mipsOptions;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index d7bfa7357d4ed..72743bbffa3ee 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1529,6 +1529,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
   ctx.arg.rejectMismatch = !args.hasArg(OPT_no_warn_mismatch);
   ctx.arg.relax = args.hasFlag(OPT_relax, OPT_no_relax, true);
   ctx.arg.relaxGP = args.hasFlag(OPT_relax_gp, OPT_no_relax_gp, false);
+  ctx.arg.relaxTbljal = args.hasArg(OPT_relax_tbljal);
   ctx.arg.rpath = getRpath(args);
   ctx.arg.relocatable = args.hasArg(OPT_relocatable);
   ctx.arg.resolveGroups =
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index c2111e58c12b9..c7a0b86d59f2b 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -423,6 +423,11 @@ defm relax_gp: BB<"relax-gp",
   "Enable global pointer relaxation",
   "Disable global pointer relaxation (default)">;
 
+def relax_tbljal : FF<"relax-tbljal">,
+                   HelpText<"Enable conversion of call instructions to table "
+                            "jump instruction from the Zcmt extension for "
+                            "frequently called functions (RISC-V only)">;
+
 defm remap_inputs: EEq<"remap-inputs",
   "Remap input files matching <from-glob> to <to-file>">,
   MetaVarName<"<from-glob>=<to-file>">;
diff --git a/lld/test/ELF/riscv-no-tbljal-call.s b/lld/test/ELF/riscv-no-tbljal-call.s
new file mode 100644
index 0000000000000..8b3a41d463972
--- /dev/null
+++ b/lld/test/ELF/riscv-no-tbljal-call.s
@@ -0,0 +1,34 @@
+# REQUIRES: riscv
+
+## When there are too few calls, table jump relaxation should not be profitable.
+## Verify the .riscv.jvt section has zero size and no cm.jt/cm.jalt are emitted.
+
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax,+zcmt %s -o %t.rv32.o
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax,+zcmt %s -o %t.rv64.o
+# RUN: ld.lld %t.rv32.o --relax-tbljal --defsym foo=0x150000 -o %t.rv32
+# RUN: ld.lld %t.rv64.o --relax-tbljal --defsym foo=0x150000 -o %t.rv64
+# RUN: llvm-readelf -S %t.rv32 | FileCheck --check-prefix=SEC32 %s
+# RUN: llvm-readelf -S %t.rv64 | FileCheck --check-prefix=SEC64 %s
+# RUN: llvm-objdump -d --mattr=+zcmt --no-show-raw-insn %t.rv32 | FileCheck --check-prefix=DISASM %s
+# RUN: llvm-objdump -d --mattr=+zcmt --no-show-raw-insn %t.rv64 | FileCheck --check-prefix=DISASM %s
+
+# SEC32: .riscv.jvt PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000000
+# SEC64: .riscv.jvt PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000000
+
+# DISASM-NOT: cm.jt
+# DISASM-NOT: cm.jalt
+
+.global _start
+.p2align 3
+_start:
+  call foo
+  tail foo_1
+  tail foo_2
+  tail foo_3
+
+foo_1:
+  nop
+foo_2:
+  nop
+foo_3:
+  nop
diff --git a/lld/test/ELF/riscv-tbljal-call.s b/lld/test/ELF/riscv-tbljal-call.s
new file mode 100644
index 0000000000000..23f510cbb7228
--- /dev/null
+++ b/lld/test/ELF/riscv-tbljal-call.s
@@ -0,0 +1,52 @@
+# REQUIRES: riscv
+
+## Test that call/tail instructions are relaxed to cm.jt/cm.jalt when
+## --relax-tbljal is enabled and the table jump is profitable.
+
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax,+zcmt %s -o %t.rv32.o
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax,+zcmt %s -o %t.rv64.o
+# RUN: ld.lld %t.rv32.o --relax-tbljal --defsym foo=0x150000 --defsym foo_1=0x150010 --defsym foo_3=0x150030 -o %t.rv32
+# RUN: ld.lld %t.rv64.o --relax-tbljal --defsym foo=0x150000 --defsym foo_1=0x150010 --defsym foo_3=0x150030 -o %t.rv64
+
+## Check disassembly for cm.jalt (rd=ra) and cm.jt (rd=zero).
+# RUN: llvm-objdump -d -M no-aliases --mattr=+zcmt --no-show-raw-insn %t.rv32 | FileCheck --check-prefix=RV32 %s
+# RUN: llvm-objdump -d -M no-aliases --mattr=+zcmt --no-show-raw-insn %t.rv64 | FileCheck --check-prefix=RV64 %s
+
+## Check jump table contents.
+# RUN: llvm-readelf -x .riscv.jvt %t.rv32 | FileCheck --check-prefix=JVT32 %s
+# RUN: llvm-readelf -x .riscv.jvt %t.rv64 | FileCheck --check-prefix=JVT64 %s
+
+## 21 calls to foo become cm.jalt (RV32), tails become cm.jt.
+# RV32-COUNT-21: cm.jalt
+# RV32:         cm.jt
+# RV32:         cm.jt
+# RV32:         cm.jt
+# RV32:         cm.jt
+
+# RV64:         cm.jt
+# RV64:         cm.jt
+# RV64:         cm.jt
+
+## Verify table entries contain the target addresses (little-endian).
+# JVT32: 30001500 10001500 00001500
+# JVT64: 30001500 00000000 10001500 00000000
+
+.global _start
+.p2align 3
+_start:
+  .rept 21
+  call foo
+  .endr
+  tail foo
+  tail foo_1
+  tail foo_1
+  tail foo_1
+  tail foo_3
+  tail foo_2
+  tail foo_3
+  tail foo_3
+  tail foo_3
+  tail foo_3
+
+foo_2:
+  nop
diff --git a/lld/test/ELF/riscv-tbljal-many-jumps.s b/lld/test/ELF/riscv-tbljal-many-jumps.s
new file mode 100644
index 0000000000000..33f9042326eb0
--- /dev/null
+++ b/lld/test/ELF/riscv-tbljal-many-jumps.s
@@ -0,0 +1,57 @@
+# REQUIRES: riscv
+
+## Test table jump with many targets filling the cm.jt table (32 entries).
+## Verify the .riscv.jvt section size accounts for all entries.
+
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax,+zcmt -asm-macro-max-nesting-depth=33 %s -o %t.rv32.o
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax,+zcmt -asm-macro-max-nesting-depth=33 %s -o %t.rv64.o
+
+## Use --defsym for targets beyond c.j range so they can only be relaxed via table jump.
+# RUN: ld.lld %t.rv32.o --relax-tbljal \
+# RUN:   --defsym t0=0x100000 --defsym t1=0x100010 --defsym t2=0x100020 --defsym t3=0x100030 \
+# RUN:   --defsym t4=0x100040 --defsym t5=0x100050 --defsym t6=0x100060 --defsym t7=0x100070 \
+# RUN:   --defsym t8=0x100080 --defsym t9=0x100090 --defsym t10=0x1000a0 --defsym t11=0x1000b0 \
+# RUN:   --defsym t12=0x1000c0 --defsym t13=0x1000d0 --defsym t14=0x1000e0 --defsym t15=0x1000f0 \
+# RUN:   --defsym t16=0x100100 --defsym t17=0x100110 --defsym t18=0x100120 --defsym t19=0x100130 \
+# RUN:   --defsym t20=0x100140 --defsym t21=0x100150 --defsym t22=0x100160 --defsym t23=0x100170 \
+# RUN:   --defsym t24=0x100180 --defsym t25=0x100190 --defsym t26=0x1001a0 --defsym t27=0x1001b0 \
+# RUN:   --defsym t28=0x1001c0 --defsym t29=0x1001d0 --defsym t30=0x1001e0 --defsym t31=0x1001f0 \
+# RUN:   -o %t.rv32
+# RUN: ld.lld %t.rv64.o --relax-tbljal \
+# RUN:   --defsym t0=0x100000 --defsym t1=0x100010 --defsym t2=0x100020 --defsym t3=0x100030 \
+# RUN:   --defsym t4=0x100040 --defsym t5=0x100050 --defsym t6=0x100060 --defsym t7=0x100070 \
+# RUN:   --defsym t8=0x100080 --defsym t9=0x100090 --defsym t10=0x1000a0 --defsym t11=0x1000b0 \
+# RUN:   --defsym t12=0x1000c0 --defsym t13=0x1000d0 --defsym t14=0x1000e0 --defsym t15=0x1000f0 \
+# RUN:   --defsym t16=0x100100 --defsym t17=0x100110 --defsym t18=0x100120 --defsym t19=0x100130 \
+# RUN:   --defsym t20=0x100140 --defsym t21=0x100150 --defsym t22=0x100160 --defsym t23=0x100170 \
+# RUN:   --defsym t24=0x100180 --defsym t25=0x100190 --defsym t26=0x1001a0 --defsym t27=0x1001b0 \
+# RUN:   --defsym t28=0x1001c0 --defsym t29=0x1001d0 --defsym t30=0x1001e0 --defsym t31=0x1001f0 \
+# RUN:   -o %t.rv64
+
+# RUN: llvm-readelf -S %t.rv32 | FileCheck --check-prefix=SEC32 %s
+# RUN: llvm-readelf -S %t.rv64 | FileCheck --check-prefix=SEC64 %s
+# RUN: llvm-objdump -d --mattr=+zcmt --no-show-raw-insn %t.rv32 | FileCheck --check-prefix=DISASM %s
+
+## 32 entries * 4 bytes = 0x80; 32 entries * 8 bytes = 0x100.
+# SEC32: .riscv.jvt PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000080
+# SEC64: .riscv.jvt PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100
+
+## Verify some instructions were converted.
+# DISASM: cm.jt
+
+.global _start
+.p2align 3
+_start:
+## Use enough repetitions per target so that the savings (2 bytes per tail on
+## RV64 after jal relaxation) exceed the table entry cost (8 bytes on RV64).
+.altmacro
+.macro iota n, i=0
+.if \n-\i
+  .rept 6
+  tail t\i
+  .endr
+  iota \n, %(\i+1)
+.endif
+.endm
+
+iota 32
diff --git a/lld/test/ELF/riscv-tbljal-syms.s b/lld/test/ELF/riscv-tbljal-syms.s
new file mode 100644
index 0000000000000..dea036c59b421
--- /dev/null
+++ b/lld/test/ELF/riscv-tbljal-syms.s
@@ -0,0 +1,42 @@
+# REQUIRES: riscv
+
+## Check that relaxation correctly adjusts symbol addresses and sizes.
+
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax,+zcmt %s -o %t.rv32.o
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax,+zcmt %s -o %t.rv64.o
+# RUN: ld.lld -Ttext=0x100000 --relax-tbljal %t.rv32.o -o %t.rv32
+# RUN: ld.lld -Ttext=0x100000 --relax-tbljal %t.rv64.o -o %t.rv64
+
+# RUN: llvm-readelf -s %t.rv32 | FileCheck --check-prefix=CHECK32 %s
+# RUN: llvm-readelf -s %t.rv64 | FileCheck --check-prefix=CHECK64 %s
+
+# CHECK32:      00100000     4 NOTYPE  LOCAL  DEFAULT     1 a
+# CHECK32-NEXT: 00100000     6 NOTYPE  LOCAL  DEFAULT     1 b
+# CHECK32-NEXT: 00100000     0 NOTYPE  LOCAL  DEFAULT     1 $x
+# CHECK32-NEXT: 00100004     2 NOTYPE  LOCAL  DEFAULT     1 c
+# CHECK32-NEXT: 00100004     6 NOTYPE  LOCAL  DEFAULT     1 d
+# CHECK32-NEXT: 00100000    10 NOTYPE  GLOBAL DEFAULT     1 _start
+# CHECK32:                     NOTYPE  GLOBAL DEFAULT   {{.*}} __jvt_base$
+
+# CHECK64:      00100000     4 NOTYPE  LOCAL  DEFAULT     1 a
+# CHECK64-NEXT: 00100000     8 NOTYPE  LOCAL  DEFAULT     1 b
+# CHECK64-NEXT: 00100000     0 NOTYPE  LOCAL  DEFAULT     1 $x
+# CHECK64-NEXT: 00100004     4 NOTYPE  LOCAL  DEFAULT     1 c
+# CHECK64-NEXT: 00100004     8 NOTYPE  LOCAL  DEFAULT     1 d
+# CHECK64-NEXT: 00100000    12 NOTYPE  GLOBAL DEFAULT     1 _start
+# CHECK64:                     NOTYPE  GLOBAL DEFAULT   {{.*}} __jvt_base$
+
+.global _start
+_start:
+a:
+b:
+  add  a0, a1, a2
+.size a, . - a
+c:
+d:
+  call _start
+.size b, . - b
+.size c, . - c
+  add a0, a1, a2
+.size d, . - d
+.size _start, . - _start



More information about the llvm-commits mailing list