[lld] [RISCV][LLD] Add RISCV zcmt optimise in linker relaxation (PR #77884)

Fri Feb 2 01:20:47 PST 2024

================
@@ -1071,3 +1128,212 @@ TargetInfo *elf::getRISCVTargetInfo() {
   static RISCV target;
   return ⌖
 }
+
+TableJumpSection::TableJumpSection()
+    : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS,
+                       config->wordsize, ".riscv.jvt") {}
+
+void TableJumpSection::addCMJTEntryCandidate(const Symbol *symbol,
+                                             int csReduction) {
+  addEntry(symbol, CMJTEntryCandidates, csReduction);
+}
+
+int TableJumpSection::getCMJTEntryIndex(const Symbol *symbol) {
+  uint32_t index = getIndex(symbol, maxCMJTEntrySize, finalizedCMJTEntries);
+  return index < finalizedCMJTEntries.size() ? (int)(startCMJTEntryIdx + index)
+                                             : -1;
+}
+
+void TableJumpSection::addCMJALTEntryCandidate(const Symbol *symbol,
+                                               int csReduction) {
+  addEntry(symbol, CMJALTEntryCandidates, csReduction);
+}
+
+int TableJumpSection::getCMJALTEntryIndex(const Symbol *symbol) {
+  uint32_t index = getIndex(symbol, maxCMJALTEntrySize, finalizedCMJALTEntries);
+  return index < finalizedCMJALTEntries.size()
+             ? (int)(startCMJALTEntryIdx + index)
+             : -1;
+}
+
+void TableJumpSection::addEntry(
+    const Symbol *symbol, llvm::DenseMap<const Symbol *, int> &entriesList,
+    int csReduction) {
+  entriesList[symbol] += csReduction;
+}
+
+uint32_t TableJumpSection::getIndex(
+    const Symbol *symbol, uint32_t maxSize,
+    SmallVector<llvm::detail::DenseMapPair<const Symbol *, int>, 0>
+        &entriesList) {
+  // Find this symbol in the ordered list of entries if it exists.
+  assert(maxSize >= entriesList.size() &&
+         "Finalized vector of entries exceeds maximum");
+  auto idx = std::find_if(
+      entriesList.begin(), entriesList.end(),
+      [symbol](llvm::detail::DenseMapPair<const Symbol *, int> &e) {
+        return e.first == symbol;
+      });
+
+  if (idx == entriesList.end())
+    return entriesList.size();
+  return idx - entriesList.begin();
+}
+
+void TableJumpSection::scanTableJumpEntries(const InputSection &sec) const {
+  for (auto [i, r] : llvm::enumerate(sec.relocations)) {
+    Defined *definedSymbol = dyn_cast<Defined>(r.sym);
+    if (!definedSymbol)
+      continue;
+    if (i + 1 == sec.relocs().size() ||
+        sec.relocs()[i + 1].type != R_RISCV_RELAX)
+      continue;
+    switch (r.type) {
+    case R_RISCV_JAL:
+    case R_RISCV_CALL:
+    case R_RISCV_CALL_PLT: {
+      const auto jalr = sec.contentMaybeDecompress().data()[r.offset + 4];
+      const uint8_t rd = extractBits(jalr, 11, 7);
+
+      int csReduction = 6;
+      if (sec.relaxAux->relocTypes[i] == R_RISCV_RVC_JUMP)
+        continue;
+      else if (sec.relaxAux->relocTypes[i] == R_RISCV_JAL)
+        csReduction = 2;
+
+      if (rd == 0)
+        in.riscvTableJumpSection->addCMJTEntryCandidate(r.sym, csReduction);
+      else if (rd == X_RA)
+        in.riscvTableJumpSection->addCMJALTEntryCandidate(r.sym, csReduction);
+    }
+    }
+  }
+}
+
+void TableJumpSection::finalizeContents() {
+  if (isFinalized)
+    return;
+  isFinalized = true;
+
+  finalizedCMJTEntries = finalizeEntry(CMJTEntryCandidates, maxCMJTEntrySize);
+  finalizedCMJALTEntries =
+      finalizeEntry(CMJALTEntryCandidates, maxCMJALTEntrySize);
+  CMJTEntryCandidates.clear();
+  CMJALTEntryCandidates.clear();
+
+  if (finalizedCMJALTEntries.size() > 0 && getSizeReduction() <= 0) {
+    // Stop relax to cm.jalt if there will be negative effect
+    finalizedCMJALTEntries.clear();
+  }
+  // if table jump still got negative effect, give up.
+  if (getSizeReduction() <= 0) {
+    warn("Table Jump Relaxation didn't got any reduction for code size.");
+    finalizedCMJTEntries.clear();
+  }
+}
+
+// Sort the map in decreasing order of the amount of code reduction provided
+// by the entries. Drop any entries that can't fit in the map from the tail
+// end since they provide less code reduction. Drop any entries that cause
+// an increase in code size (i.e. the reduction from instruction conversion
+// does not cover the code size gain from adding a table entry).
+SmallVector<llvm::detail::DenseMapPair<const Symbol *, int>, 0>
+TableJumpSection::finalizeEntry(llvm::DenseMap<const Symbol *, int> EntryMap,
+                                uint32_t maxSize) {
+  auto cmp = [](const llvm::detail::DenseMapPair<const Symbol *, int> &p1,
+                const llvm::detail::DenseMapPair<const Symbol *, int> &p2) {
+    return p1.second > p2.second;
+  };
+
+  SmallVector<llvm::detail::DenseMapPair<const Symbol *, int>, 0>
+      tempEntryVector;
+  std::copy(EntryMap.begin(), EntryMap.end(),
+            std::back_inserter(tempEntryVector));
+  std::sort(tempEntryVector.begin(), tempEntryVector.end(), cmp);
+
+  auto finalizedVector = tempEntryVector;
+  if (tempEntryVector.size() >= maxSize)
+    finalizedVector =
+        SmallVector<llvm::detail::DenseMapPair<const Symbol *, int>, 0>(
+            tempEntryVector.begin(), tempEntryVector.begin() + maxSize);
+
+  // Drop any items that have a negative effect (i.e. increase code size).
+  while (finalizedVector.size()) {
+    if (finalizedVector.rbegin()->second < config->wordsize)
+      finalizedVector.pop_back();
+    else
+      break;
+  }
+  return finalizedVector;
+}
+
+size_t TableJumpSection::getSize() const {
+  if (isFinalized) {
+    if (!finalizedCMJALTEntries.empty())
+      return (startCMJALTEntryIdx + finalizedCMJALTEntries.size()) *
+             config->wordsize;
+    return (startCMJTEntryIdx + finalizedCMJTEntries.size()) * config->wordsize;
+  } else {
+    if (!CMJALTEntryCandidates.empty())
+      return (startCMJALTEntryIdx + CMJALTEntryCandidates.size()) *
+             config->wordsize;
+    return (startCMJTEntryIdx + CMJTEntryCandidates.size()) * config->wordsize;
+  }
+}
+
+int32_t TableJumpSection::getSizeReduction() {
----------------
JackGittes wrote:

OK, I got that the cm.jalt's targets start at startCMJALTEntryIdx with a 32 * wordsize space for cm.jt entries, making the getSizeReduction behave like this. But considering that cm.jt entries are relatively fewer than cm.jalt such that the first 32 entries are generally not full, is it better to split riscv.jvt section to jt/jalt sub-sections and calculate their reductions separately ?

https://github.com/llvm/llvm-project/pull/77884