[clang] 3b3de48 - [BOLT] Add BB index to BAT (#86044)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Mar 22 06:07:22 PDT 2024
Author: Amir Ayupov
Date: 2024-03-22T06:07:17-07:00
New Revision: 3b3de48fd84b8269d5f45ee0a9dc6b7448368424
URL: https://github.com/llvm/llvm-project/commit/3b3de48fd84b8269d5f45ee0a9dc6b7448368424
DIFF: https://github.com/llvm/llvm-project/commit/3b3de48fd84b8269d5f45ee0a9dc6b7448368424.diff
LOG: [BOLT] Add BB index to BAT (#86044)
Added:
Modified:
bolt/docs/BAT.md
bolt/include/bolt/Profile/BoltAddressTranslation.h
bolt/lib/Profile/BoltAddressTranslation.cpp
bolt/test/X86/bolt-address-translation-yaml.test
bolt/test/X86/bolt-address-translation.test
clang/lib/Driver/ToolChains/Clang.cpp
clang/test/Driver/unsupported-option-gpu.c
lld/MachO/Driver.cpp
lld/MachO/InputSection.cpp
lld/MachO/InputSection.h
lld/MachO/ObjC.cpp
lld/MachO/SyntheticSections.cpp
Removed:
################################################################################
diff --git a/bolt/docs/BAT.md b/bolt/docs/BAT.md
index 186b0e5ea89d38..436593478a398e 100644
--- a/bolt/docs/BAT.md
+++ b/bolt/docs/BAT.md
@@ -90,11 +90,12 @@ current function.
### Address translation table
Delta encoding means that only the
diff erence with the previous corresponding
entry is encoded. Input offsets implicitly start at zero.
-| Entry | Encoding | Description |
-| ------ | ------| ----------- |
-| `OutputOffset` | Continuous, Delta, ULEB128 | Function offset in output binary |
-| `InputOffset` | Optional, Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit |
-| `BBHash` | Optional, 8b | Basic block entries only: basic block hash in input binary |
+| Entry | Encoding | Description | Branch/BB |
+| ------ | ------| ----------- | ------ |
+| `OutputOffset` | Continuous, Delta, ULEB128 | Function offset in output binary | Both |
+| `InputOffset` | Optional, Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit | Both |
+| `BBHash` | Optional, 8b | Basic block hash in input binary | BB |
+| `BBIdx` | Optional, Delta, ULEB128 | Basic block index in input binary | BB |
`BRANCHENTRY` bit denotes whether a given offset pair is a control flow source
(branch or call instruction). If not set, it signifies a control flow target
diff --git a/bolt/include/bolt/Profile/BoltAddressTranslation.h b/bolt/include/bolt/Profile/BoltAddressTranslation.h
index 1f53f6d344ad74..eda2b318f0d0a3 100644
--- a/bolt/include/bolt/Profile/BoltAddressTranslation.h
+++ b/bolt/include/bolt/Profile/BoltAddressTranslation.h
@@ -122,6 +122,10 @@ class BoltAddressTranslation {
/// Returns BF hash by function output address (after BOLT).
size_t getBFHash(uint64_t OutputAddress) const;
+ /// Returns BB index by function output address (after BOLT) and basic block
+ /// input offset.
+ unsigned getBBIndex(uint64_t FuncOutputAddress, uint32_t BBInputOffset) const;
+
/// True if a given \p Address is a function with translation table entry.
bool isBATFunction(uint64_t Address) const { return Maps.count(Address); }
@@ -154,7 +158,8 @@ class BoltAddressTranslation {
std::map<uint64_t, MapTy> Maps;
- using BBHashMap = std::unordered_map<uint32_t, size_t>;
+ /// Map basic block input offset to a basic block index and hash pair.
+ using BBHashMap = std::unordered_map<uint32_t, std::pair<unsigned, size_t>>;
std::unordered_map<uint64_t, std::pair<size_t, BBHashMap>> FuncHashes;
/// Links outlined cold bocks to their original function
diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp
index 1d61a1b735b403..8fe976cc00e53c 100644
--- a/bolt/lib/Profile/BoltAddressTranslation.cpp
+++ b/bolt/lib/Profile/BoltAddressTranslation.cpp
@@ -45,6 +45,8 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n",
getBBHash(HotFuncAddress, BBInputOffset)));
(void)HotFuncAddress;
+ LLVM_DEBUG(dbgs() << formatv(" Index: {0}\n",
+ getBBIndex(HotFuncAddress, BBInputOffset)));
// In case of conflicts (same Key mapping to
diff erent Vals), the last
// update takes precedence. Of course it is not ideal to have conflicts and
// those happen when we have an empty BB that either contained only
@@ -217,6 +219,7 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
}
size_t Index = 0;
uint64_t InOffset = 0;
+ size_t PrevBBIndex = 0;
// Output and Input addresses and delta-encoded
for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
const uint64_t OutputAddress = KeyVal.first + Address;
@@ -226,11 +229,15 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
encodeSLEB128(KeyVal.second - InOffset, OS);
InOffset = KeyVal.second; // Keeping InOffset as if BRANCHENTRY is encoded
if ((InOffset & BRANCHENTRY) == 0) {
- // Basic block hash
- size_t BBHash = FuncHashPair.second[InOffset >> 1];
+ unsigned BBIndex;
+ size_t BBHash;
+ std::tie(BBIndex, BBHash) = FuncHashPair.second[InOffset >> 1];
OS.write(reinterpret_cast<char *>(&BBHash), 8);
- LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} {2:x}\n", KeyVal.first,
- InOffset >> 1, BBHash));
+ // Basic block index in the input binary
+ encodeULEB128(BBIndex - PrevBBIndex, OS);
+ PrevBBIndex = BBIndex;
+ LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} {2:x} {3}\n", KeyVal.first,
+ InOffset >> 1, BBHash, BBIndex));
}
}
}
@@ -316,6 +323,7 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
<< Twine::utohexstr(Address) << "\n");
uint64_t InputOffset = 0;
+ size_t BBIndex = 0;
for (uint32_t J = 0; J < NumEntries; ++J) {
const uint64_t OutputDelta = DE.getULEB128(&Offset, &Err);
const uint64_t OutputAddress = PrevAddress + OutputDelta;
@@ -330,19 +338,25 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
}
Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset));
size_t BBHash = 0;
+ size_t BBIndexDelta = 0;
const bool IsBranchEntry = InputOffset & BRANCHENTRY;
if (!IsBranchEntry) {
BBHash = DE.getU64(&Offset, &Err);
+ BBIndexDelta = DE.getULEB128(&Offset, &Err);
+ BBIndex += BBIndexDelta;
// Map basic block hash to hot fragment by input offset
- FuncHashes[HotAddress].second.emplace(InputOffset >> 1, BBHash);
+ FuncHashes[HotAddress].second.emplace(InputOffset >> 1,
+ std::pair(BBIndex, BBHash));
}
LLVM_DEBUG({
dbgs() << formatv(
"{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}", OutputOffset,
InputOffset, OutputDelta, getULEB128Size(OutputDelta), InputDelta,
(J < EqualElems) ? 0 : getSLEB128Size(InputDelta), OutputAddress);
- if (BBHash)
- dbgs() << formatv(" {0:x}", BBHash);
+ if (!IsBranchEntry) {
+ dbgs() << formatv(" {0:x} {1}/{2}b", BBHash, BBIndex,
+ getULEB128Size(BBIndexDelta));
+ }
dbgs() << '\n';
});
}
@@ -494,14 +508,19 @@ void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
FuncHashes[BF.getAddress()].first = BF.computeHash();
BF.computeBlockHashes();
for (const BinaryBasicBlock &BB : BF)
- FuncHashes[BF.getAddress()].second.emplace(BB.getInputOffset(),
- BB.getHash());
+ FuncHashes[BF.getAddress()].second.emplace(
+ BB.getInputOffset(), std::pair(BB.getIndex(), BB.getHash()));
}
}
+unsigned BoltAddressTranslation::getBBIndex(uint64_t FuncOutputAddress,
+ uint32_t BBInputOffset) const {
+ return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).first;
+}
+
size_t BoltAddressTranslation::getBBHash(uint64_t FuncOutputAddress,
uint32_t BBInputOffset) const {
- return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset);
+ return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).second;
}
size_t BoltAddressTranslation::getBFHash(uint64_t OutputAddress) const {
diff --git a/bolt/test/X86/bolt-address-translation-yaml.test b/bolt/test/X86/bolt-address-translation-yaml.test
index 25ff4e7fbfcc57..4516a662697acc 100644
--- a/bolt/test/X86/bolt-address-translation-yaml.test
+++ b/bolt/test/X86/bolt-address-translation-yaml.test
@@ -18,7 +18,7 @@ RUN: | FileCheck --check-prefix CHECK-BOLT-YAML %s
WRITE-BAT-CHECK: BOLT-INFO: Wrote 5 BAT maps
WRITE-BAT-CHECK: BOLT-INFO: Wrote 4 function and 22 basic block hashes
-WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 344
+WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 376
READ-BAT-CHECK-NOT: BOLT-ERROR: unable to save profile in YAML format for input file processed by BOLT
READ-BAT-CHECK: BOLT-INFO: Parsed 5 BAT entries
diff --git a/bolt/test/X86/bolt-address-translation.test b/bolt/test/X86/bolt-address-translation.test
index 4277b4e0d0fef0..5c1db89e3c6b25 100644
--- a/bolt/test/X86/bolt-address-translation.test
+++ b/bolt/test/X86/bolt-address-translation.test
@@ -37,7 +37,7 @@
# CHECK: BOLT: 3 out of 7 functions were overwritten.
# CHECK: BOLT-INFO: Wrote 6 BAT maps
# CHECK: BOLT-INFO: Wrote 3 function and 58 basic block hashes
-# CHECK: BOLT-INFO: BAT section size (bytes): 816
+# CHECK: BOLT-INFO: BAT section size (bytes): 920
#
# usqrt mappings (hot part). We match against any key (left side containing
# the bolted binary offsets) because BOLT may change where it puts instructions
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 86a287db72a4eb..bc9cc8ce6cf5a5 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5863,8 +5863,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
} else if (Triple.getArch() == llvm::Triple::x86_64) {
Ok = llvm::is_contained({"small", "kernel", "medium", "large", "tiny"},
CM);
- } else if (Triple.isNVPTX() || Triple.isAMDGPU() || Triple.isSPIRV()) {
- // NVPTX/AMDGPU/SPIRV does not care about the code model and will accept
+ } else if (Triple.isNVPTX() || Triple.isAMDGPU()) {
+ // NVPTX/AMDGPU does not care about the code model and will accept
// whatever works for the host.
Ok = true;
} else if (Triple.isSPARC64()) {
diff --git a/clang/test/Driver/unsupported-option-gpu.c b/clang/test/Driver/unsupported-option-gpu.c
index 5618b2cba72e16..f23cb71ebfb08e 100644
--- a/clang/test/Driver/unsupported-option-gpu.c
+++ b/clang/test/Driver/unsupported-option-gpu.c
@@ -2,5 +2,4 @@
// DEFINE: %{check} = %clang -### --target=x86_64-linux-gnu -c -mcmodel=medium
// RUN: %{check} -x cuda %s --cuda-path=%S/Inputs/CUDA/usr/local/cuda --offload-arch=sm_60 --no-cuda-version-check -fbasic-block-sections=all
-// RUN: %{check} -x hip %s --offload=spirv64 -nogpulib -nogpuinc
// RUN: %{check} -x hip %s --rocm-path=%S/Inputs/rocm -nogpulib -nogpuinc
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 919a14b8bcf08b..36248925d65ad2 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -612,7 +612,7 @@ static void replaceCommonSymbols() {
if (!osec)
osec = ConcatOutputSection::getOrCreateForInput(isec);
isec->parent = osec;
- addInputSection(isec);
+ inputSections.push_back(isec);
// FIXME: CommonSymbol should store isReferencedDynamically, noDeadStrip
// and pass them on here.
@@ -1220,18 +1220,53 @@ static void createFiles(const InputArgList &args) {
static void gatherInputSections() {
TimeTraceScope timeScope("Gathering input sections");
+ int inputOrder = 0;
for (const InputFile *file : inputFiles) {
for (const Section *section : file->sections) {
// Compact unwind entries require special handling elsewhere. (In
// contrast, EH frames are handled like regular ConcatInputSections.)
if (section->name == section_names::compactUnwind)
continue;
- for (const Subsection &subsection : section->subsections)
- addInputSection(subsection.isec);
+ ConcatOutputSection *osec = nullptr;
+ for (const Subsection &subsection : section->subsections) {
+ if (auto *isec = dyn_cast<ConcatInputSection>(subsection.isec)) {
+ if (isec->isCoalescedWeak())
+ continue;
+ if (config->emitInitOffsets &&
+ sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) {
+ in.initOffsets->addInput(isec);
+ continue;
+ }
+ isec->outSecOff = inputOrder++;
+ if (!osec)
+ osec = ConcatOutputSection::getOrCreateForInput(isec);
+ isec->parent = osec;
+ inputSections.push_back(isec);
+ } else if (auto *isec =
+ dyn_cast<CStringInputSection>(subsection.isec)) {
+ if (isec->getName() == section_names::objcMethname) {
+ if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder)
+ in.objcMethnameSection->inputOrder = inputOrder++;
+ in.objcMethnameSection->addInput(isec);
+ } else {
+ if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
+ in.cStringSection->inputOrder = inputOrder++;
+ in.cStringSection->addInput(isec);
+ }
+ } else if (auto *isec =
+ dyn_cast<WordLiteralInputSection>(subsection.isec)) {
+ if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)
+ in.wordLiteralSection->inputOrder = inputOrder++;
+ in.wordLiteralSection->addInput(isec);
+ } else {
+ llvm_unreachable("unexpected input section kind");
+ }
+ }
}
if (!file->objCImageInfo.empty())
in.objCImageInfo->addFile(file);
}
+ assert(inputOrder <= UnspecifiedInputOrder);
}
static void foldIdenticalLiterals() {
@@ -1387,7 +1422,6 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
concatOutputSections.clear();
inputFiles.clear();
inputSections.clear();
- inputSectionsOrder = 0;
loadedArchives.clear();
loadedObjectFrameworks.clear();
missingAutolinkWarnings.clear();
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 22930d52dd1db2..8f5affb1dc21d8 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -37,44 +37,6 @@ static_assert(sizeof(void *) != 8 ||
"instances of it");
std::vector<ConcatInputSection *> macho::inputSections;
-int macho::inputSectionsOrder = 0;
-
-// Call this function to add a new InputSection and have it routed to the
-// appropriate container. Depending on its type and current config, it will
-// either be added to 'inputSections' vector or to a synthetic section.
-void lld::macho::addInputSection(InputSection *inputSection) {
- if (auto *isec = dyn_cast<ConcatInputSection>(inputSection)) {
- if (isec->isCoalescedWeak())
- return;
- if (config->emitInitOffsets &&
- sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) {
- in.initOffsets->addInput(isec);
- return;
- }
- isec->outSecOff = inputSectionsOrder++;
- auto *osec = ConcatOutputSection::getOrCreateForInput(isec);
- isec->parent = osec;
- inputSections.push_back(isec);
- } else if (auto *isec = dyn_cast<CStringInputSection>(inputSection)) {
- if (isec->getName() == section_names::objcMethname) {
- if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder)
- in.objcMethnameSection->inputOrder = inputSectionsOrder++;
- in.objcMethnameSection->addInput(isec);
- } else {
- if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
- in.cStringSection->inputOrder = inputSectionsOrder++;
- in.cStringSection->addInput(isec);
- }
- } else if (auto *isec = dyn_cast<WordLiteralInputSection>(inputSection)) {
- if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)
- in.wordLiteralSection->inputOrder = inputSectionsOrder++;
- in.wordLiteralSection->addInput(isec);
- } else {
- llvm_unreachable("unexpected input section kind");
- }
-
- assert(inputSectionsOrder <= UnspecifiedInputOrder);
-}
uint64_t InputSection::getFileSize() const {
return isZeroFill(getFlags()) ? 0 : getSize();
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index 694bdf734907ba..b25f0638f4c6cb 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -302,8 +302,6 @@ bool isEhFrameSection(const InputSection *);
bool isGccExceptTabSection(const InputSection *);
extern std::vector<ConcatInputSection *> inputSections;
-// This is used as a counter for specyfing input order for input sections
-extern int inputSectionsOrder;
namespace section_names {
@@ -371,7 +369,6 @@ constexpr const char addrSig[] = "__llvm_addrsig";
} // namespace section_names
-void addInputSection(InputSection *inputSection);
} // namespace macho
std::string toString(const macho::InputSection *);
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 5902b82d30f556..40df2243b26f06 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -790,7 +790,7 @@ void ObjcCategoryMerger::emitAndLinkProtocolList(
infoCategoryWriter.catPtrListInfo.align);
listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
listSec->live = true;
- addInputSection(listSec);
+ allInputSections.push_back(listSec);
listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
@@ -848,7 +848,7 @@ void ObjcCategoryMerger::emitAndLinkPointerList(
infoCategoryWriter.catPtrListInfo.align);
listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
listSec->live = true;
- addInputSection(listSec);
+ allInputSections.push_back(listSec);
listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
@@ -889,7 +889,7 @@ ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCateogryName,
bodyData, infoCategoryWriter.catListInfo.align);
newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
newCatList->live = true;
- addInputSection(newCatList);
+ allInputSections.push_back(newCatList);
newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
@@ -927,7 +927,7 @@ Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
bodyData, infoCategoryWriter.catBodyInfo.align);
newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
newBodySec->live = true;
- addInputSection(newBodySec);
+ allInputSections.push_back(newBodySec);
std::string symName =
objc::symbol_names::category + baseClassName + "_$_(" + name + ")";
@@ -1132,7 +1132,7 @@ void ObjcCategoryMerger::generateCatListForNonErasedCategories(
infoCategoryWriter.catListInfo.align);
listSec->parent = infoCategoryWriter.catListInfo.outputSection;
listSec->live = true;
- addInputSection(listSec);
+ allInputSections.push_back(listSec);
std::string slotSymName = "<__objc_catlist slot for category ";
slotSymName += nonErasedCatBody->getName();
@@ -1221,11 +1221,9 @@ void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); }
StringRef ObjcCategoryMerger::newStringData(const char *str) {
uint32_t len = strlen(str);
- uint32_t bufSize = len + 1;
- auto &data = newSectionData(bufSize);
+ auto &data = newSectionData(len + 1);
char *strData = reinterpret_cast<char *>(data.data());
- // Copy the string chars and null-terminator
- memcpy(strData, str, bufSize);
+ strncpy(strData, str, len);
return StringRef(strData, len);
}
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 1b3694528de1dd..7ee3261ce3075f 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -793,7 +793,7 @@ void StubHelperSection::setUp() {
in.imageLoaderCache->parent =
ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache);
- addInputSection(in.imageLoaderCache);
+ inputSections.push_back(in.imageLoaderCache);
// Since this isn't in the symbol table or in any input file, the noDeadStrip
// argument doesn't matter.
dyldPrivate =
@@ -855,7 +855,7 @@ ConcatInputSection *ObjCSelRefsSection::makeSelRef(StringRef methname) {
/*addend=*/static_cast<int64_t>(methnameOffset),
/*referent=*/in.objcMethnameSection->isec});
objcSelref->parent = ConcatOutputSection::getOrCreateForInput(objcSelref);
- addInputSection(objcSelref);
+ inputSections.push_back(objcSelref);
objcSelref->isFinal = true;
methnameToSelref[CachedHashStringRef(methname)] = objcSelref;
return objcSelref;
More information about the cfe-commits
mailing list