[lld] [RISCV][LLD] Zcmt RISC-V extension in lld (PR #183450)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 3 23:59:33 PST 2026
https://github.com/LukeZhuang updated https://github.com/llvm/llvm-project/pull/183450
>From a1feb6b2f9c1653b7a8e3c9e571c76718eef538b Mon Sep 17 00:00:00 2001
From: Zhi Zhuang <zhuangzhi.zz at alibaba-inc.com>
Date: Tue, 7 Oct 2025 20:25:01 +0200
Subject: [PATCH] [RISCV][LLD] Zcmt RISC-V extension in lld (#183450)
This patch implements the optimization for the Zcmt extension in LLD,
which helps saving notable code-size, especially on the embedded executables.
For more details about Zcmt extension, check the psABI here:
https://riscv.github.io/riscv-isa-manual/snapshot/unprivileged/#insns-tablejump
We scan and collect call/jump relocType in each section before the linker relaxation,
and record the symbols with the bytes they can save each one of them into a table.
Then finalize the table by choosing the most valuable candidates (because the table
size is rather limited). And use this table in the linker relaxation loop to relax
call/jumps whose destination symbol is in our candidate list. Finally we create a
new TableJump section and write that table to it.
This is a continuation of PR #77884 and #163142
Co-authored-by: Craig Topper <craig.topper at sifive.com>
Co-authored-by: VincentWu <43398706+Xinlong-Wu at users.noreply.github.com>
Co-authored-by: Scott Egerton <9487234+ScottEgerton at users.noreply.github.com>
Co-authored-by: Robin Kastberg <Robin.Kastberg at iar.com>
Co-authored-by: Fangrui Song <i at maskray.me>
---
lld/ELF/Arch/RISCV.cpp | 278 ++++++++++++++++++++++++-
lld/ELF/Config.h | 2 +
lld/ELF/Driver.cpp | 1 +
lld/ELF/Options.td | 5 +
lld/test/ELF/riscv-no-tbljal-call.s | 34 +++
lld/test/ELF/riscv-tbljal-call.s | 52 +++++
lld/test/ELF/riscv-tbljal-many-jumps.s | 57 +++++
lld/test/ELF/riscv-tbljal-syms.s | 42 ++++
8 files changed, 465 insertions(+), 6 deletions(-)
create mode 100644 lld/test/ELF/riscv-no-tbljal-call.s
create mode 100644 lld/test/ELF/riscv-tbljal-call.s
create mode 100644 lld/test/ELF/riscv-tbljal-many-jumps.s
create mode 100644 lld/test/ELF/riscv-tbljal-syms.s
diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index 85f49c9260565..0ee226a79a996 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -9,6 +9,7 @@
#include "InputFiles.h"
#include "OutputSections.h"
#include "RelocScan.h"
+#include "SymbolTable.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
@@ -32,6 +33,7 @@ class RISCV final : public TargetInfo {
public:
RISCV(Ctx &);
uint32_t calcEFlags() const override;
+ void initTargetSpecificSections() override;
int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
void writeGotHeader(uint8_t *buf) const override;
void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
@@ -66,6 +68,33 @@ class RISCV final : public TargetInfo {
SmallVector<std::pair<uint64_t, uint64_t>, 0> synthesizedAligns;
};
+struct TableJumpEntry {
+ int saved;
+ int index;
+};
+
+// Used by RISC-V Zcmt table jump relaxation.
+class TableJumpSection final : public SyntheticSection {
+public:
+ TableJumpSection(Ctx &);
+ size_t getSize() const override;
+ void writeTo(uint8_t *buf) override;
+
+ void finalizeContents() override;
+ int getCMJTEntryIndex(const Symbol *sym) const;
+ int getCMJALTEntryIndex(const Symbol *sym) const;
+ void addEntry(const Symbol *sym, int saved, bool isCMJT);
+
+private:
+ static constexpr size_t maxCMJTEntrySize = 32;
+ static constexpr size_t maxCMJALTEntrySize = 224;
+ static constexpr size_t startCMJALTEntryIdx = 32;
+
+ // Candidate maps: symbol -> (total code size reduction, table index).
+ llvm::DenseMap<const Symbol *, TableJumpEntry> cmjtCandidates;
+ llvm::DenseMap<const Symbol *, TableJumpEntry> cmjaltCandidates;
+};
+
} // end anonymous namespace
// These are internal relocation numbers for GP/X0 relaxation. They aren't part
@@ -74,6 +103,7 @@ class RISCV final : public TargetInfo {
#define INTERNAL_R_RISCV_GPREL_S 257
#define INTERNAL_R_RISCV_X0REL_I 258
#define INTERNAL_R_RISCV_X0REL_S 259
+#define INTERNAL_R_RISCV_TBJAL 260
const uint64_t dtpOffset = 0x800;
@@ -189,6 +219,18 @@ uint32_t RISCV::calcEFlags() const {
return target;
}
+void RISCV::initTargetSpecificSections() {
+ if (ctx.arg.relaxTbljal) {
+ ctx.in.riscvTableJump = std::make_unique<TableJumpSection>(ctx);
+ ctx.inputSections.push_back(ctx.in.riscvTableJump.get());
+
+ Symbol *s = ctx.symtab->addSymbol(Defined{
+ ctx, /*file=*/ctx.internalFile, "__jvt_base$", STB_GLOBAL, STV_DEFAULT,
+ STT_NOTYPE, /*value=*/0, /*size=*/0, ctx.in.riscvTableJump.get()});
+ s->isUsedInRegularObj = true;
+ }
+}
+
int64_t RISCV::getImplicitAddend(const uint8_t *buf, RelType type) const {
switch (type) {
default:
@@ -541,6 +583,9 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
return;
}
+ case INTERNAL_R_RISCV_TBJAL:
+ return;
+
case R_RISCV_ADD8:
*loc += val;
return;
@@ -795,16 +840,44 @@ void elf::initSymbolAnchors(Ctx &ctx) {
}
}
+static bool relaxTableJump(Ctx &ctx, const InputSection &sec, size_t i,
+ uint64_t loc, Relocation &r, uint32_t &remove) {
+ if (!ctx.in.riscvTableJump)
+ return false;
+
+ uint32_t insn = read32le(sec.contentMaybeDecompress().data() + r.offset +
+ (r.type == R_RISCV_JAL ? 0 : 4));
+ uint8_t rd = extractBits(insn, 11, 7);
+ int tblEntryIndex = -1;
+ auto &tableJump = static_cast<TableJumpSection &>(*ctx.in.riscvTableJump);
+ if (rd == X_X0)
+ tblEntryIndex = tableJump.getCMJTEntryIndex(r.sym);
+ else if (rd == X_RA)
+ tblEntryIndex = tableJump.getCMJALTEntryIndex(r.sym);
+
+ if (tblEntryIndex < 0)
+ return false;
+ sec.relaxAux->relocTypes[i] = INTERNAL_R_RISCV_TBJAL;
+ sec.relaxAux->writes.push_back(0xA002 |
+ (tblEntryIndex << 2)); // cm.jt or cm.jalt
+ remove = r.type == R_RISCV_JAL ? 2 : 6;
+ return true;
+}
+
+static int64_t getCallDisplace(Ctx &ctx, const Relocation &r, uint64_t loc) {
+ const Symbol &sym = *r.sym;
+ const uint64_t dest =
+ (r.expr == R_PLT_PC ? sym.getPltVA(ctx) : sym.getVA(ctx)) + r.addend;
+ return dest - loc;
+}
+
// Relax R_RISCV_CALL/R_RISCV_CALL_PLT auipc+jalr to c.j, c.jal, or jal.
static void relaxCall(Ctx &ctx, const InputSection &sec, size_t i, uint64_t loc,
Relocation &r, uint32_t &remove) {
const bool rvc = getEFlags(ctx, sec.file) & EF_RISCV_RVC;
- const Symbol &sym = *r.sym;
const uint64_t insnPair = read64le(sec.content().data() + r.offset);
const uint32_t rd = extractBits(insnPair, 32 + 11, 32 + 7);
- const uint64_t dest =
- (r.expr == R_PLT_PC ? sym.getPltVA(ctx) : sym.getVA(ctx)) + r.addend;
- const int64_t displace = dest - loc;
+ const int64_t displace = getCallDisplace(ctx, r, loc);
// When the caller specifies the old value of `remove`, disallow its
// increment.
@@ -817,6 +890,9 @@ static void relaxCall(Ctx &ctx, const InputSection &sec, size_t i, uint64_t loc,
sec.relaxAux->relocTypes[i] = R_RISCV_RVC_JUMP;
sec.relaxAux->writes.push_back(0x2001); // c.jal
remove = 6;
+ } else if (remove >= (r.type == R_RISCV_JAL ? 2 : 6) &&
+ relaxTableJump(ctx, sec, i, loc, r, remove)) {
+ // relaxTableJump sets remove
} else if (remove >= 4 && isInt<21>(displace)) {
sec.relaxAux->relocTypes[i] = R_RISCV_JAL;
sec.relaxAux->writes.push_back(0x6f | rd << 7); // jal
@@ -940,6 +1016,10 @@ static bool relax(Ctx &ctx, int pass, InputSection &sec) {
relaxCall(ctx, sec, i, loc, r, remove);
}
break;
+ case R_RISCV_JAL:
+ if (relaxable(relocs, i))
+ relaxTableJump(ctx, sec, i, loc, r, remove);
+ break;
case R_RISCV_TPREL_HI20:
case R_RISCV_TPREL_ADD:
case R_RISCV_TPREL_LO12_I:
@@ -999,6 +1079,46 @@ static bool relax(Ctx &ctx, int pass, InputSection &sec) {
return changed;
}
+static void scanTableJumpEntries(Ctx &ctx, const InputSection &sec) {
+ for (auto [i, r] : llvm::enumerate(sec.relocations)) {
+ if (!r.sym->isDefined())
+ continue;
+ if (i + 1 == sec.relocs().size() ||
+ sec.relocs()[i + 1].type != R_RISCV_RELAX)
+ continue;
+ switch (r.type) {
+ case R_RISCV_JAL:
+ case R_RISCV_CALL:
+ case R_RISCV_CALL_PLT: {
+ uint32_t insn = read32le(sec.contentMaybeDecompress().data() + r.offset +
+ (r.type == R_RISCV_JAL ? 0 : 4));
+ uint8_t rd = extractBits(insn, 11, 7);
+
+ // Skip the jal/j which can be potentially relaxed to c.jal/c.j. No need
+ // to waste the limited jumptable entries on them.
+ const uint64_t loc = sec.getVA() + r.offset;
+ const int64_t displace = getCallDisplace(ctx, r, loc);
+ const bool rvc = getEFlags(ctx, sec.file) & EF_RISCV_RVC;
+ if (rvc && isInt<12>(displace)) {
+ if ((rd == X_X0) || (rd == X_RA && !ctx.arg.is64))
+ continue;
+ }
+
+ // If the jal/j can be relaxed to a 32-bit instruction, the saving becomes
+ // actually 2 bytes (4->2), otherwise it's 6 bytes (8->2)
+ int saved = isInt<21>(displace) ? 2 : 6;
+
+ auto &tableJump = static_cast<TableJumpSection &>(*ctx.in.riscvTableJump);
+ if (rd == X_X0)
+ tableJump.addEntry(r.sym, saved, /*isCMJT=*/true);
+ else if (rd == X_RA)
+ tableJump.addEntry(r.sym, saved, /*isCMJT=*/false);
+ break;
+ }
+ }
+ }
+}
+
// When relaxing just R_RISCV_ALIGN, relocDeltas is usually changed only once in
// the absence of a linker script. For call and load/store R_RISCV_RELAX, code
// shrinkage may reduce displacement and make more relocations eligible for
@@ -1008,10 +1128,23 @@ static bool relax(Ctx &ctx, int pass, InputSection &sec) {
// relaxation pass.
bool RISCV::relaxOnce(int pass) const {
llvm::TimeTraceScope timeScope("RISC-V relaxOnce");
- if (pass == 0)
+ SmallVector<InputSection *, 0> storage;
+ if (pass == 0) {
initSymbolAnchors(ctx);
- SmallVector<InputSection *, 0> storage;
+ // Only do scan/finalize once before all relaxations
+ if (ctx.in.riscvTableJump) {
+ // scan all relocations and build the Zcmt jump table
+ for (OutputSection *osec : ctx.outputSections) {
+ if (!(osec->flags & SHF_EXECINSTR))
+ continue;
+ for (InputSection *sec : getInputSections(*osec, storage))
+ scanTableJumpEntries(ctx, *sec);
+ }
+ ctx.in.riscvTableJump->finalizeContents();
+ }
+ }
+
bool changed = false;
for (OutputSection *osec : ctx.outputSections) {
if (!(osec->flags & SHF_EXECINSTR))
@@ -1194,6 +1327,14 @@ void RISCV::finalizeRelax(int passes) const {
case INTERNAL_R_RISCV_X0REL_I:
case INTERNAL_R_RISCV_X0REL_S:
break;
+ case INTERNAL_R_RISCV_TBJAL:
+ assert(ctx.arg.relaxTbljal &&
+ "TBJAL relocation without --relax-tbljal");
+ assert((aux.writes[writesIdx] & 0xfc03) == 0xA002 &&
+ "malformed cm.jt/cm.jalt encoding");
+ skip = 2;
+ write16le(p, aux.writes[writesIdx++]);
+ break;
case R_RISCV_RELAX:
// Used by relaxTlsLe to indicate the relocation is ignored.
break;
@@ -1532,3 +1673,128 @@ void elf::mergeRISCVAttributesSections(Ctx &ctx) {
}
void elf::setRISCVTargetInfo(Ctx &ctx) { ctx.target.reset(new RISCV(ctx)); }
+
+TableJumpSection::TableJumpSection(Ctx &ctx)
+ : SyntheticSection(ctx, ".riscv.jvt", SHT_PROGBITS, SHF_ALLOC,
+ /*alignment=*/64) {}
+
+int TableJumpSection::getCMJTEntryIndex(const Symbol *sym) const {
+ auto it = cmjtCandidates.find(sym);
+ return it != cmjtCandidates.end() ? it->second.index : -1;
+}
+
+int TableJumpSection::getCMJALTEntryIndex(const Symbol *sym) const {
+ auto it = cmjaltCandidates.find(sym);
+ return it != cmjaltCandidates.end() ? startCMJALTEntryIdx + it->second.index
+ : -1;
+}
+
+void TableJumpSection::addEntry(const Symbol *sym, int saved, bool isCMJT) {
+ auto &candidates = isCMJT ? cmjtCandidates : cmjaltCandidates;
+ if (!candidates.count(sym))
+ candidates[sym] = TableJumpEntry{/*saved=*/0, /*index=*/-1};
+ candidates[sym].saved += saved;
+}
+
+// Sort candidates by code size reduction (descending), truncate to maxSize,
+// and drop entries whose reduction doesn't cover the table entry cost.
+static void
+selectEntries(Ctx &ctx,
+ llvm::DenseMap<const Symbol *, TableJumpEntry> &candidates,
+ uint32_t maxSize) {
+ SmallVector<std::pair<const Symbol *, TableJumpEntry>, 0> entries(
+ candidates.begin(), candidates.end());
+ llvm::sort(entries, [](const auto &a, const auto &b) {
+ return a.second.saved > b.second.saved;
+ });
+ if (entries.size() > maxSize)
+ entries.resize(maxSize);
+ // Drop entries that don't save enough to cover the table entry cost.
+ while (!entries.empty() &&
+ entries.back().second.saved < (int)ctx.arg.wordsize)
+ entries.pop_back();
+
+ // Fill the index back to the map
+ for (auto [i, entry] : llvm::enumerate(entries))
+ candidates[entry.first].index = (int)i;
+
+ // Remove the candidates whose index == -1, which means not selected
+ // (llvm::erase_if does not seems to work on DenseMap)
+ for (auto it = candidates.begin(), end = candidates.end(); it != end;) {
+ if (it->second.index >= 0)
+ ++it;
+ else
+ candidates.erase(it++);
+ }
+}
+
+void TableJumpSection::finalizeContents() {
+ // cleanup the candidates by removing non-interesting ones and picking
+ // the most profitable ones (since the list size is quite limited)
+ selectEntries(ctx, cmjtCandidates, maxCMJTEntrySize);
+ selectEntries(ctx, cmjaltCandidates, maxCMJALTEntrySize);
+
+ // We have three choices here:
+ // 1. adopt both cm.jt/cm.jalt
+ // 2. only adopt cm.jt
+ // 3. give up
+ // (cm.jalt only is meaningless, since the table size is the same as #1)
+
+ int savedBoth =
+ (startCMJALTEntryIdx + cmjaltCandidates.size()) * ctx.arg.wordsize;
+ int savedCMJTOnly = cmjtCandidates.size() * ctx.arg.wordsize;
+ savedBoth = -savedBoth;
+ savedCMJTOnly = -savedCMJTOnly;
+
+ for (auto &[sym, entry] : cmjtCandidates) {
+ savedCMJTOnly += entry.saved;
+ savedBoth += entry.saved;
+ }
+ for (auto &[sym, entry] : cmjaltCandidates)
+ savedBoth += entry.saved;
+
+ // Using cm.jalt requires padding the cm.jt region to 32 entries.
+ // Drop cm.jalt if the padding cost exceeds cm.jalt's benefit.
+ if (!cmjaltCandidates.empty() && savedBoth < savedCMJTOnly)
+ cmjaltCandidates.clear();
+
+ // If overall code size doesn't decrease, give up entirely.
+ if (savedCMJTOnly <= 0) {
+ Log(ctx) << "table jump relaxation didn't reduce code size";
+ cmjtCandidates.clear();
+ cmjaltCandidates.clear();
+ }
+}
+
+size_t TableJumpSection::getSize() const {
+ if (!cmjaltCandidates.empty())
+ return (startCMJALTEntryIdx + cmjaltCandidates.size()) * ctx.arg.wordsize;
+ return cmjtCandidates.size() * ctx.arg.wordsize;
+}
+
+static void
+writeEntries(Ctx &ctx, uint8_t *buf,
+ const llvm::DenseMap<const Symbol *, TableJumpEntry> &candidates) {
+ // Order the candidates by their indexes
+ SmallVector<std::pair<const Symbol *, TableJumpEntry>, 0> entries(
+ candidates.begin(), candidates.end());
+ llvm::sort(entries, [](const auto &a, const auto &b) {
+ return a.second.index < b.second.index;
+ });
+ for (auto &[sym, entry] : entries) {
+ uint64_t va = sym->getVA(ctx);
+ if (ctx.arg.is64)
+ write64le(buf, va);
+ else
+ write32le(buf, va);
+ buf += ctx.arg.wordsize;
+ }
+}
+
+void TableJumpSection::writeTo(uint8_t *buf) {
+ if (!cmjtCandidates.empty())
+ writeEntries(ctx, buf, cmjtCandidates);
+ if (!cmjaltCandidates.empty())
+ writeEntries(ctx, buf + startCMJALTEntryIdx * ctx.arg.wordsize,
+ cmjaltCandidates);
+}
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index a9f74460f6f99..abc6b19daa116 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -367,6 +367,7 @@ struct Config {
bool resolveGroups;
bool relrGlibc = false;
bool relrPackDynRelocs = false;
+ bool relaxTbljal;
llvm::DenseSet<llvm::StringRef> saveTempsArgs;
llvm::SmallVector<std::pair<llvm::GlobPattern, uint32_t>, 0> shuffleSections;
bool singleRoRx;
@@ -574,6 +575,7 @@ struct InStruct {
std::unique_ptr<RelroPaddingSection> relroPadding;
std::unique_ptr<SyntheticSection> armCmseSGSection;
std::unique_ptr<PPC64LongBranchTargetSection> ppc64LongBranchTarget;
+ std::unique_ptr<SyntheticSection> riscvTableJump;
std::unique_ptr<SyntheticSection> mipsAbiFlags;
std::unique_ptr<MipsGotSection> mipsGot;
std::unique_ptr<SyntheticSection> mipsOptions;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index d7bfa7357d4ed..72743bbffa3ee 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1529,6 +1529,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
ctx.arg.rejectMismatch = !args.hasArg(OPT_no_warn_mismatch);
ctx.arg.relax = args.hasFlag(OPT_relax, OPT_no_relax, true);
ctx.arg.relaxGP = args.hasFlag(OPT_relax_gp, OPT_no_relax_gp, false);
+ ctx.arg.relaxTbljal = args.hasArg(OPT_relax_tbljal);
ctx.arg.rpath = getRpath(args);
ctx.arg.relocatable = args.hasArg(OPT_relocatable);
ctx.arg.resolveGroups =
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index c2111e58c12b9..c7a0b86d59f2b 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -423,6 +423,11 @@ defm relax_gp: BB<"relax-gp",
"Enable global pointer relaxation",
"Disable global pointer relaxation (default)">;
+def relax_tbljal : FF<"relax-tbljal">,
+ HelpText<"Enable conversion of call instructions to table "
+ "jump instruction from the Zcmt extension for "
+ "frequently called functions (RISC-V only)">;
+
defm remap_inputs: EEq<"remap-inputs",
"Remap input files matching <from-glob> to <to-file>">,
MetaVarName<"<from-glob>=<to-file>">;
diff --git a/lld/test/ELF/riscv-no-tbljal-call.s b/lld/test/ELF/riscv-no-tbljal-call.s
new file mode 100644
index 0000000000000..8b3a41d463972
--- /dev/null
+++ b/lld/test/ELF/riscv-no-tbljal-call.s
@@ -0,0 +1,34 @@
+# REQUIRES: riscv
+
+## When there are too few calls, table jump relaxation should not be profitable.
+## Verify the .riscv.jvt section has zero size and no cm.jt/cm.jalt are emitted.
+
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax,+zcmt %s -o %t.rv32.o
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax,+zcmt %s -o %t.rv64.o
+# RUN: ld.lld %t.rv32.o --relax-tbljal --defsym foo=0x150000 -o %t.rv32
+# RUN: ld.lld %t.rv64.o --relax-tbljal --defsym foo=0x150000 -o %t.rv64
+# RUN: llvm-readelf -S %t.rv32 | FileCheck --check-prefix=SEC32 %s
+# RUN: llvm-readelf -S %t.rv64 | FileCheck --check-prefix=SEC64 %s
+# RUN: llvm-objdump -d --mattr=+zcmt --no-show-raw-insn %t.rv32 | FileCheck --check-prefix=DISASM %s
+# RUN: llvm-objdump -d --mattr=+zcmt --no-show-raw-insn %t.rv64 | FileCheck --check-prefix=DISASM %s
+
+# SEC32: .riscv.jvt PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000000
+# SEC64: .riscv.jvt PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000000
+
+# DISASM-NOT: cm.jt
+# DISASM-NOT: cm.jalt
+
+.global _start
+.p2align 3
+_start:
+ call foo
+ tail foo_1
+ tail foo_2
+ tail foo_3
+
+foo_1:
+ nop
+foo_2:
+ nop
+foo_3:
+ nop
diff --git a/lld/test/ELF/riscv-tbljal-call.s b/lld/test/ELF/riscv-tbljal-call.s
new file mode 100644
index 0000000000000..23f510cbb7228
--- /dev/null
+++ b/lld/test/ELF/riscv-tbljal-call.s
@@ -0,0 +1,52 @@
+# REQUIRES: riscv
+
+## Test that call/tail instructions are relaxed to cm.jt/cm.jalt when
+## --relax-tbljal is enabled and the table jump is profitable.
+
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax,+zcmt %s -o %t.rv32.o
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax,+zcmt %s -o %t.rv64.o
+# RUN: ld.lld %t.rv32.o --relax-tbljal --defsym foo=0x150000 --defsym foo_1=0x150010 --defsym foo_3=0x150030 -o %t.rv32
+# RUN: ld.lld %t.rv64.o --relax-tbljal --defsym foo=0x150000 --defsym foo_1=0x150010 --defsym foo_3=0x150030 -o %t.rv64
+
+## Check disassembly for cm.jalt (rd=ra) and cm.jt (rd=zero).
+# RUN: llvm-objdump -d -M no-aliases --mattr=+zcmt --no-show-raw-insn %t.rv32 | FileCheck --check-prefix=RV32 %s
+# RUN: llvm-objdump -d -M no-aliases --mattr=+zcmt --no-show-raw-insn %t.rv64 | FileCheck --check-prefix=RV64 %s
+
+## Check jump table contents.
+# RUN: llvm-readelf -x .riscv.jvt %t.rv32 | FileCheck --check-prefix=JVT32 %s
+# RUN: llvm-readelf -x .riscv.jvt %t.rv64 | FileCheck --check-prefix=JVT64 %s
+
+## 21 calls to foo become cm.jalt (RV32), tails become cm.jt.
+# RV32-COUNT-21: cm.jalt
+# RV32: cm.jt
+# RV32: cm.jt
+# RV32: cm.jt
+# RV32: cm.jt
+
+# RV64: cm.jt
+# RV64: cm.jt
+# RV64: cm.jt
+
+## Verify table entries contain the target addresses (little-endian).
+# JVT32: 30001500 10001500 00001500
+# JVT64: 30001500 00000000 10001500 00000000
+
+.global _start
+.p2align 3
+_start:
+ .rept 21
+ call foo
+ .endr
+ tail foo
+ tail foo_1
+ tail foo_1
+ tail foo_1
+ tail foo_3
+ tail foo_2
+ tail foo_3
+ tail foo_3
+ tail foo_3
+ tail foo_3
+
+foo_2:
+ nop
diff --git a/lld/test/ELF/riscv-tbljal-many-jumps.s b/lld/test/ELF/riscv-tbljal-many-jumps.s
new file mode 100644
index 0000000000000..33f9042326eb0
--- /dev/null
+++ b/lld/test/ELF/riscv-tbljal-many-jumps.s
@@ -0,0 +1,57 @@
+# REQUIRES: riscv
+
+## Test table jump with many targets filling the cm.jt table (32 entries).
+## Verify the .riscv.jvt section size accounts for all entries.
+
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax,+zcmt -asm-macro-max-nesting-depth=33 %s -o %t.rv32.o
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax,+zcmt -asm-macro-max-nesting-depth=33 %s -o %t.rv64.o
+
+## Use --defsym for targets beyond c.j range so they can only be relaxed via table jump.
+# RUN: ld.lld %t.rv32.o --relax-tbljal \
+# RUN: --defsym t0=0x100000 --defsym t1=0x100010 --defsym t2=0x100020 --defsym t3=0x100030 \
+# RUN: --defsym t4=0x100040 --defsym t5=0x100050 --defsym t6=0x100060 --defsym t7=0x100070 \
+# RUN: --defsym t8=0x100080 --defsym t9=0x100090 --defsym t10=0x1000a0 --defsym t11=0x1000b0 \
+# RUN: --defsym t12=0x1000c0 --defsym t13=0x1000d0 --defsym t14=0x1000e0 --defsym t15=0x1000f0 \
+# RUN: --defsym t16=0x100100 --defsym t17=0x100110 --defsym t18=0x100120 --defsym t19=0x100130 \
+# RUN: --defsym t20=0x100140 --defsym t21=0x100150 --defsym t22=0x100160 --defsym t23=0x100170 \
+# RUN: --defsym t24=0x100180 --defsym t25=0x100190 --defsym t26=0x1001a0 --defsym t27=0x1001b0 \
+# RUN: --defsym t28=0x1001c0 --defsym t29=0x1001d0 --defsym t30=0x1001e0 --defsym t31=0x1001f0 \
+# RUN: -o %t.rv32
+# RUN: ld.lld %t.rv64.o --relax-tbljal \
+# RUN: --defsym t0=0x100000 --defsym t1=0x100010 --defsym t2=0x100020 --defsym t3=0x100030 \
+# RUN: --defsym t4=0x100040 --defsym t5=0x100050 --defsym t6=0x100060 --defsym t7=0x100070 \
+# RUN: --defsym t8=0x100080 --defsym t9=0x100090 --defsym t10=0x1000a0 --defsym t11=0x1000b0 \
+# RUN: --defsym t12=0x1000c0 --defsym t13=0x1000d0 --defsym t14=0x1000e0 --defsym t15=0x1000f0 \
+# RUN: --defsym t16=0x100100 --defsym t17=0x100110 --defsym t18=0x100120 --defsym t19=0x100130 \
+# RUN: --defsym t20=0x100140 --defsym t21=0x100150 --defsym t22=0x100160 --defsym t23=0x100170 \
+# RUN: --defsym t24=0x100180 --defsym t25=0x100190 --defsym t26=0x1001a0 --defsym t27=0x1001b0 \
+# RUN: --defsym t28=0x1001c0 --defsym t29=0x1001d0 --defsym t30=0x1001e0 --defsym t31=0x1001f0 \
+# RUN: -o %t.rv64
+
+# RUN: llvm-readelf -S %t.rv32 | FileCheck --check-prefix=SEC32 %s
+# RUN: llvm-readelf -S %t.rv64 | FileCheck --check-prefix=SEC64 %s
+# RUN: llvm-objdump -d --mattr=+zcmt --no-show-raw-insn %t.rv32 | FileCheck --check-prefix=DISASM %s
+
+## 32 entries * 4 bytes = 0x80; 32 entries * 8 bytes = 0x100.
+# SEC32: .riscv.jvt PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000080
+# SEC64: .riscv.jvt PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100
+
+## Verify some instructions were converted.
+# DISASM: cm.jt
+
+.global _start
+.p2align 3
+_start:
+## Use enough repetitions per target so that the savings (2 bytes per tail on
+## RV64 after jal relaxation) exceed the table entry cost (8 bytes on RV64).
+.altmacro
+.macro iota n, i=0
+.if \n-\i
+ .rept 6
+ tail t\i
+ .endr
+ iota \n, %(\i+1)
+.endif
+.endm
+
+iota 32
diff --git a/lld/test/ELF/riscv-tbljal-syms.s b/lld/test/ELF/riscv-tbljal-syms.s
new file mode 100644
index 0000000000000..dea036c59b421
--- /dev/null
+++ b/lld/test/ELF/riscv-tbljal-syms.s
@@ -0,0 +1,42 @@
+# REQUIRES: riscv
+
+## Check that relaxation correctly adjusts symbol addresses and sizes.
+
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax,+zcmt %s -o %t.rv32.o
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax,+zcmt %s -o %t.rv64.o
+# RUN: ld.lld -Ttext=0x100000 --relax-tbljal %t.rv32.o -o %t.rv32
+# RUN: ld.lld -Ttext=0x100000 --relax-tbljal %t.rv64.o -o %t.rv64
+
+# RUN: llvm-readelf -s %t.rv32 | FileCheck --check-prefix=CHECK32 %s
+# RUN: llvm-readelf -s %t.rv64 | FileCheck --check-prefix=CHECK64 %s
+
+# CHECK32: 00100000 4 NOTYPE LOCAL DEFAULT 1 a
+# CHECK32-NEXT: 00100000 6 NOTYPE LOCAL DEFAULT 1 b
+# CHECK32-NEXT: 00100000 0 NOTYPE LOCAL DEFAULT 1 $x
+# CHECK32-NEXT: 00100004 2 NOTYPE LOCAL DEFAULT 1 c
+# CHECK32-NEXT: 00100004 6 NOTYPE LOCAL DEFAULT 1 d
+# CHECK32-NEXT: 00100000 10 NOTYPE GLOBAL DEFAULT 1 _start
+# CHECK32: NOTYPE GLOBAL DEFAULT {{.*}} __jvt_base$
+
+# CHECK64: 00100000 4 NOTYPE LOCAL DEFAULT 1 a
+# CHECK64-NEXT: 00100000 8 NOTYPE LOCAL DEFAULT 1 b
+# CHECK64-NEXT: 00100000 0 NOTYPE LOCAL DEFAULT 1 $x
+# CHECK64-NEXT: 00100004 4 NOTYPE LOCAL DEFAULT 1 c
+# CHECK64-NEXT: 00100004 8 NOTYPE LOCAL DEFAULT 1 d
+# CHECK64-NEXT: 00100000 12 NOTYPE GLOBAL DEFAULT 1 _start
+# CHECK64: NOTYPE GLOBAL DEFAULT {{.*}} __jvt_base$
+
+.global _start
+_start:
+a:
+b:
+ add a0, a1, a2
+.size a, . - a
+c:
+d:
+ call _start
+.size b, . - b
+.size c, . - c
+ add a0, a1, a2
+.size d, . - d
+.size _start, . - _start
More information about the llvm-commits
mailing list