[llvm] [BOLT] Add support for safe-icf (PR #116275)

Alexander Yermolovich via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 10 16:30:59 PST 2024


================
@@ -340,6 +354,113 @@ typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
 
 namespace llvm {
 namespace bolt {
+/// Scans symbol table and creates a bit vector of memory addresses of vtables.
+static void processSymbolTable(const BinaryContext &BC,
+                               llvm::BitVector &BitVector) {
+  for (auto &[Address, Data] : BC.getBinaryData()) {
+    // Filter out all symbols that are not vtables.
+    if (!Data->getName().starts_with("_ZTV"))
+      continue;
+    for (uint64_t I = Address / 8, End = I + (Data->getSize() / 8); I < End;
+         ++I)
+      BitVector.set(I);
+  }
+}
+Error IdenticalCodeFolding::processDataRelocations(
+    BinaryContext &BC, const SectionRef &SecRefRelData,
+    const llvm::BitVector &BitVector, const bool HasAddressTaken) {
+  for (const RelocationRef &Rel : SecRefRelData.relocations()) {
+    if (BitVector.test(Rel.getOffset() / 8))
+      continue;
+    symbol_iterator SymbolIter = Rel.getSymbol();
+    const ObjectFile *OwningObj = Rel.getObject();
+    assert(SymbolIter != OwningObj->symbol_end() &&
+           "relocation Symbol expected");
+    const SymbolRef &Symbol = *SymbolIter;
+    const uint64_t SymbolAddress = cantFail(Symbol.getAddress());
+    const ELFObjectFileBase *ELFObj = dyn_cast<ELFObjectFileBase>(OwningObj);
+    if (!ELFObj)
+      return createFatalBOLTError(
+          Twine("BOLT-ERROR: Only ELFObjectFileBase is supported"));
+    const int64_t Addend = getRelocationAddend(ELFObj, Rel);
+    BinaryFunction *BF = BC.getBinaryFunctionAtAddress(SymbolAddress + Addend);
+    if (!BF)
+      continue;
+    BF->setHasAddressTaken(HasAddressTaken);
+  }
+  return Error::success();
+}
+
+Error IdenticalCodeFolding::markFunctionsUnsafeToFold(BinaryContext &BC) {
+  if (!BC.isX86())
+    BC.outs() << "BOLT-WARNING: Safe ICF is only supported for x86\n";
+  constexpr uint64_t NumBits = (((uint64_t)1) << 32) / 8;
+  llvm::BitVector BitVector(NumBits);
+  processSymbolTable(BC, BitVector);
+  for (const auto &Sec : BC.sections()) {
+    if (!Sec.hasSectionRef() || !Sec.isRela())
+      continue;
+    const SectionRef &SecRef = Sec.getSectionRef();
+    section_iterator RelocatedSecIter = cantFail(SecRef.getRelocatedSection());
+    assert(RelocatedSecIter != SecRef.getObject()->section_end() &&
+           "Relocation section exists without corresponding relocated section");
+    const SectionRef &RelocatedSecRef = *RelocatedSecIter;
+    const StringRef RelocatedSectionName = cantFail(RelocatedSecRef.getName());
+    const bool SkipRelocs =
+        StringSwitch<bool>(RelocatedSectionName)
+            .Cases(".plt", ".got.plt", ".eh_frame", ".gcc_except_table",
+                   ".fini_array", ".init_array", true)
+            .Default(false);
+    if (!RelocatedSecRef.isData() || SkipRelocs)
+      continue;
+
+    Error ErrorStatus = processDataRelocations(BC, SecRef, BitVector,
----------------
ayermolo wrote:

This I don't follow.
Are you suggesting refactoring bool RewriteInstance::analyzeRelocation To use BinarySection to extract information from the relocation?

Right now it does it in that method:

```
  const bool IsPCRelative = Relocation::isPCRelative(RType);
  const uint64_t PCRelOffset = IsPCRelative && !IsAArch64 ? Rel.getOffset() : 0;
  bool SkipVerification = false;
  auto SymbolIter = Rel.getSymbol();
  if (SymbolIter == InputFile->symbol_end()) {
    SymbolAddress = ExtractedValue - Addend + PCRelOffset;
    MCSymbol *RelSymbol =
        BC->getOrCreateGlobalSymbol(SymbolAddress, "RELSYMat");
    SymbolName = std::string(RelSymbol->getName());
    IsSectionRelocation = false;
  } else {
    const SymbolRef &Symbol = *SymbolIter;
    SymbolName = std::string(cantFail(Symbol.getName()));
    SymbolAddress = cantFail(Symbol.getAddress());
    SkipVerification = (cantFail(Symbol.getType()) == SymbolRef::ST_Other);
    // Section symbols are marked as ST_Debug.
    IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug);
    // Check for PLT entry registered with symbol name
    if (!SymbolAddress && !IsWeakReference(Symbol) &&
        (IsAArch64 || BC->isRISCV())) {
      const BinaryData *BD = BC->getPLTBinaryDataByName(SymbolName);
      SymbolAddress = BD ? BD->getAddress() : 0;
    }
  }
```

https://github.com/llvm/llvm-project/pull/116275


More information about the llvm-commits mailing list