[llvm-branch-commits] [lld] 9993071 - Handle overflow beyond the 127 common encodings limit
Greg McGary via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Dec 19 14:59:29 PST 2020
Author: Greg McGary
Date: 2020-12-19T14:54:37-08:00
New Revision: 99930719c66df9a8b67f3575d251b182c9cc8ee9
URL: https://github.com/llvm/llvm-project/commit/99930719c66df9a8b67f3575d251b182c9cc8ee9
DIFF: https://github.com/llvm/llvm-project/commit/99930719c66df9a8b67f3575d251b182c9cc8ee9.diff
LOG: Handle overflow beyond the 127 common encodings limit
The common encodings table holds only 127 entries. The encodings index for compact entries is 8 bits wide, and indexes 127..255 are stored locally to each second-level page. Prior to this diff, lld would `fatal()` if encodings overflowed the 127 limit.
This diff populates a per-second-level-page encodings table as needed. When the per-page encodings table hits its limit, we must terminate the page. If such early termination would consume fewer entries than a regular (non-compact) encoding page, then we prefer the regular format.
Caveat: one reason the common-encoding table might overflow is because of DWARF debug-info references, which are not yet implemented and will come with a later diff.
Differential Revision: https://reviews.llvm.org/D93267
Added:
Modified:
lld/MachO/UnwindInfoSection.cpp
lld/MachO/UnwindInfoSection.h
lld/test/MachO/tools/generate-cfi-funcs.py
lld/test/MachO/tools/validate-unwind-info.py
Removed:
################################################################################
diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index ed6cf050f576..acb4a9d69b79 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -25,6 +25,24 @@ using namespace llvm::MachO;
using namespace lld;
using namespace lld::macho;
+#define COMMON_ENCODINGS_MAX 127
+#define COMPACT_ENCODINGS_MAX 256
+
+#define SECOND_LEVEL_PAGE_BYTES 4096
+#define SECOND_LEVEL_PAGE_WORDS (SECOND_LEVEL_PAGE_BYTES / sizeof(uint32_t))
+#define REGULAR_SECOND_LEVEL_ENTRIES_MAX \
+ ((SECOND_LEVEL_PAGE_BYTES - \
+ sizeof(unwind_info_regular_second_level_page_header)) / \
+ sizeof(unwind_info_regular_second_level_entry))
+#define COMPRESSED_SECOND_LEVEL_ENTRIES_MAX \
+ ((SECOND_LEVEL_PAGE_BYTES - \
+ sizeof(unwind_info_compressed_second_level_page_header)) / \
+ sizeof(uint32_t))
+
+#define COMPRESSED_ENTRY_FUNC_OFFSET_BITS 24
+#define COMPRESSED_ENTRY_FUNC_OFFSET_MASK \
+ UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(~0)
+
// Compact Unwind format is a Mach-O evolution of DWARF Unwind that
// optimizes space and exception-time lookup. Most DWARF unwind
// entries can be replaced with Compact Unwind entries, but the ones
@@ -101,7 +119,7 @@ void UnwindInfoSection::finalize() {
// Rather than sort & fold the 32-byte entries directly, we create a
// vector of pointers to entries and sort & fold that instead.
cuPtrVector.reserve(cuCount);
- for (const auto &cuEntry : cuVector)
+ for (const CompactUnwindEntry64 &cuEntry : cuVector)
cuPtrVector.emplace_back(&cuEntry);
std::sort(cuPtrVector.begin(), cuPtrVector.end(),
[](const CompactUnwindEntry64 *a, const CompactUnwindEntry64 *b) {
@@ -129,13 +147,11 @@ void UnwindInfoSection::finalize() {
cuPtrVector.erase(foldWrite, cuPtrVector.end());
// Count frequencies of the folded encodings
- llvm::DenseMap<compact_unwind_encoding_t, size_t> encodingFrequencies;
+ EncodingMap encodingFrequencies;
for (auto cuPtrEntry : cuPtrVector)
encodingFrequencies[cuPtrEntry->encoding]++;
- if (encodingFrequencies.size() > UNWIND_INFO_COMMON_ENCODINGS_MAX)
- error("TODO(gkm): handle common encodings table overflow");
- // Make a table of encodings, sorted by descending frequency
+ // Make a vector of encodings, sorted by descending frequency
for (const auto &frequency : encodingFrequencies)
commonEncodings.emplace_back(frequency);
std::sort(commonEncodings.begin(), commonEncodings.end(),
@@ -148,37 +164,67 @@ void UnwindInfoSection::finalize() {
return a.second > b.second;
});
- // Split folded encodings into pages, limited by capacity of a page
- // and the 24-bit range of function offset
- //
- // Record the page splits as a vector of iterators on cuPtrVector
- // such that successive elements form a semi-open interval. E.g.,
- // page X's bounds are thus: [ pageBounds[X] .. pageBounds[X+1] )
- //
- // Note that pageBounds.size() is one greater than the number of
- // pages, and pageBounds.back() holds the sentinel cuPtrVector.cend()
- pageBounds.push_back(cuPtrVector.cbegin());
- // TODO(gkm): cut 1st page entries short to accommodate section headers ???
- CompactUnwindEntry64 cuEntryKey;
- for (size_t i = 0;;) {
- // Limit the search to entries that can fit within a 4 KiB page.
- const auto pageBegin = pageBounds[0] + i;
- const auto pageMax =
- pageBounds[0] +
- std::min(i + UNWIND_INFO_COMPRESSED_SECOND_LEVEL_ENTRIES_MAX,
- cuPtrVector.size());
- // Exclude entries with functionOffset that would overflow 24 bits
- cuEntryKey.functionAddress = (*pageBegin)->functionAddress +
- UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_MASK;
- const auto pageBreak = std::lower_bound(
- pageBegin, pageMax, &cuEntryKey,
- [](const CompactUnwindEntry64 *a, const CompactUnwindEntry64 *b) {
- return a->functionAddress < b->functionAddress;
- });
- pageBounds.push_back(pageBreak);
- if (pageBreak == cuPtrVector.cend())
- break;
- i = pageBreak - cuPtrVector.cbegin();
+ // Truncate the vector to 127 elements.
+ // Common encoding indexes are limited to 0..126, while enconding
+ // indexes 127..255 are local to each second-level page
+ if (commonEncodings.size() > COMMON_ENCODINGS_MAX)
+ commonEncodings.resize(COMMON_ENCODINGS_MAX);
+
+ // Create a map from encoding to common-encoding-table index
+ for (size_t i = 0; i < commonEncodings.size(); i++)
+ commonEncodingIndexes[commonEncodings[i].first] = i;
+
+ // Split folded encodings into pages, where each page is limited by ...
+ // (a) 4 KiB capacity
+ // (b) 24-bit
diff erence between first & final function address
+ // (c) 8-bit compact-encoding-table index,
+ // for which 0..126 references the global common-encodings table,
+ // and 127..255 references a local per-second-level-page table.
+ // First we try the compact format and determine how many entries fit.
+ // If more entries fit in the regular format, we use that.
+ for (size_t i = 0; i < cuPtrVector.size();) {
+ secondLevelPages.emplace_back();
+ auto &page = secondLevelPages.back();
+ page.entryIndex = i;
+ uintptr_t functionAddressMax =
+ cuPtrVector[i]->functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK;
+ size_t n = commonEncodings.size();
+ size_t wordsRemaining =
+ SECOND_LEVEL_PAGE_WORDS -
+ sizeof(unwind_info_compressed_second_level_page_header) /
+ sizeof(uint32_t);
+ while (wordsRemaining >= 1 && i < cuPtrVector.size()) {
+ const auto *cuPtr = cuPtrVector[i];
+ if (cuPtr->functionAddress >= functionAddressMax) {
+ break;
+ } else if (commonEncodingIndexes.count(cuPtr->encoding) ||
+ page.localEncodingIndexes.count(cuPtr->encoding)) {
+ i++;
+ wordsRemaining--;
+ } else if (wordsRemaining >= 2 && n < COMPACT_ENCODINGS_MAX) {
+ page.localEncodings.emplace_back(cuPtr->encoding);
+ page.localEncodingIndexes[cuPtr->encoding] = n++;
+ i++;
+ wordsRemaining -= 2;
+ } else {
+ break;
+ }
+ }
+ page.entryCount = i - page.entryIndex;
+
+ // If this is not the final page, see if it's possible to fit more
+ // entries by using the regular format. This can happen when there
+ // are many unique encodings, and we we saturated the local
+ // encoding table early.
+ if (i < cuPtrVector.size() &&
+ page.entryCount < REGULAR_SECOND_LEVEL_ENTRIES_MAX) {
+ page.kind = UNWIND_SECOND_LEVEL_REGULAR;
+ page.entryCount = std::min(REGULAR_SECOND_LEVEL_ENTRIES_MAX,
+ cuPtrVector.size() - page.entryIndex);
+ i = page.entryIndex + page.entryCount;
+ } else {
+ page.kind = UNWIND_SECOND_LEVEL_COMPRESSED;
+ }
}
// compute size of __TEXT,__unwind_info section
@@ -186,12 +232,12 @@ void UnwindInfoSection::finalize() {
sizeof(unwind_info_section_header) +
commonEncodings.size() * sizeof(uint32_t) +
personalities.size() * sizeof(uint32_t) +
- pageBounds.size() * sizeof(unwind_info_section_header_index_entry) +
+ // The extra second-level-page entry is for the sentinel
+ (secondLevelPages.size() + 1) *
+ sizeof(unwind_info_section_header_index_entry) +
lsdaEntries.size() * sizeof(unwind_info_section_header_lsda_index_entry);
- unwindInfoSize = level2PagesOffset +
- (pageBounds.size() - 1) *
- sizeof(unwind_info_compressed_second_level_page_header) +
- cuPtrVector.size() * sizeof(uint32_t);
+ unwindInfoSize =
+ level2PagesOffset + secondLevelPages.size() * SECOND_LEVEL_PAGE_BYTES;
}
// All inputs are relocated and output addresses are known, so write!
@@ -208,7 +254,7 @@ void UnwindInfoSection::writeTo(uint8_t *buf) const {
uip->personalityArrayCount = personalities.size();
uip->indexSectionOffset = uip->personalityArraySectionOffset +
(uip->personalityArrayCount * sizeof(uint32_t));
- uip->indexCount = pageBounds.size();
+ uip->indexCount = secondLevelPages.size() + 1;
// Common encodings
auto *i32p = reinterpret_cast<uint32_t *>(&uip[1]);
@@ -216,7 +262,7 @@ void UnwindInfoSection::writeTo(uint8_t *buf) const {
*i32p++ = encoding.first;
// Personalities
- for (const auto &personality : personalities)
+ for (const uint32_t &personality : personalities)
*i32p++ = personality;
// Level-1 index
@@ -225,16 +271,12 @@ void UnwindInfoSection::writeTo(uint8_t *buf) const {
uip->indexCount * sizeof(unwind_info_section_header_index_entry);
uint64_t l2PagesOffset = level2PagesOffset;
auto *iep = reinterpret_cast<unwind_info_section_header_index_entry *>(i32p);
- for (size_t i = 0; i < pageBounds.size() - 1; i++) {
- iep->functionOffset = (*pageBounds[i])->functionAddress;
+ for (const SecondLevelPage &page : secondLevelPages) {
+ iep->functionOffset = cuPtrVector[page.entryIndex]->functionAddress;
iep->secondLevelPagesSectionOffset = l2PagesOffset;
iep->lsdaIndexArraySectionOffset = lsdaOffset;
iep++;
- // TODO(gkm): pad to 4 KiB page boundary ???
- size_t entryCount = pageBounds[i + 1] - pageBounds[i];
- uint64_t pageSize = sizeof(unwind_info_section_header_index_entry) +
- entryCount * sizeof(uint32_t);
- l2PagesOffset += pageSize;
+ l2PagesOffset += SECOND_LEVEL_PAGE_BYTES;
}
// Level-1 sentinel
const CompactUnwindEntry64 &cuEnd = cuVector.back();
@@ -246,41 +288,52 @@ void UnwindInfoSection::writeTo(uint8_t *buf) const {
// LSDAs
auto *lep =
reinterpret_cast<unwind_info_section_header_lsda_index_entry *>(iep);
- for (const auto &lsda : lsdaEntries) {
+ for (const unwind_info_section_header_lsda_index_entry &lsda : lsdaEntries) {
lep->functionOffset = lsda.functionOffset;
lep->lsdaOffset = lsda.lsdaOffset;
}
- // create map from encoding to common-encoding-table index compact
- // encoding entries use 7 bits to index the common-encoding table
- size_t i = 0;
- llvm::DenseMap<compact_unwind_encoding_t, size_t> commonEncodingIndexes;
- for (const auto &encoding : commonEncodings)
- commonEncodingIndexes[encoding.first] = i++;
-
// Level-2 pages
- auto *p2p =
- reinterpret_cast<unwind_info_compressed_second_level_page_header *>(lep);
- for (size_t i = 0; i < pageBounds.size() - 1; i++) {
- p2p->kind = UNWIND_SECOND_LEVEL_COMPRESSED;
- p2p->entryPageOffset =
- sizeof(unwind_info_compressed_second_level_page_header);
- p2p->entryCount = pageBounds[i + 1] - pageBounds[i];
- p2p->encodingsPageOffset =
- p2p->entryPageOffset + p2p->entryCount * sizeof(uint32_t);
- p2p->encodingsCount = 0;
- auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
- auto cuPtrVectorIt = pageBounds[i];
- uintptr_t functionAddressBase = (*cuPtrVectorIt)->functionAddress;
- while (cuPtrVectorIt < pageBounds[i + 1]) {
- const CompactUnwindEntry64 *cuep = *cuPtrVectorIt++;
- size_t cueIndex = commonEncodingIndexes.lookup(cuep->encoding);
- *ep++ = ((cueIndex << UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_BITS) |
- (cuep->functionAddress - functionAddressBase));
+ auto *pp = reinterpret_cast<uint32_t *>(lep);
+ for (const SecondLevelPage &page : secondLevelPages) {
+ if (page.kind == UNWIND_SECOND_LEVEL_COMPRESSED) {
+ uintptr_t functionAddressBase =
+ cuPtrVector[page.entryIndex]->functionAddress;
+ auto *p2p =
+ reinterpret_cast<unwind_info_compressed_second_level_page_header *>(
+ pp);
+ p2p->kind = page.kind;
+ p2p->entryPageOffset =
+ sizeof(unwind_info_compressed_second_level_page_header);
+ p2p->entryCount = page.entryCount;
+ p2p->encodingsPageOffset =
+ p2p->entryPageOffset + p2p->entryCount * sizeof(uint32_t);
+ p2p->encodingsCount = page.localEncodings.size();
+ auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
+ for (size_t i = 0; i < page.entryCount; i++) {
+ const CompactUnwindEntry64 *cuep = cuPtrVector[page.entryIndex + i];
+ auto it = commonEncodingIndexes.find(cuep->encoding);
+ if (it == commonEncodingIndexes.end())
+ it = page.localEncodingIndexes.find(cuep->encoding);
+ *ep++ = (it->second << COMPRESSED_ENTRY_FUNC_OFFSET_BITS) |
+ (cuep->functionAddress - functionAddressBase);
+ }
+ memcpy(ep, page.localEncodings.data(),
+ page.localEncodings.size() * sizeof(uint32_t));
+ } else {
+ auto *p2p =
+ reinterpret_cast<unwind_info_regular_second_level_page_header *>(pp);
+ p2p->kind = page.kind;
+ p2p->entryPageOffset =
+ sizeof(unwind_info_regular_second_level_page_header);
+ p2p->entryCount = page.entryCount;
+ auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
+ for (size_t i = 0; i < page.entryCount; i++) {
+ const CompactUnwindEntry64 *cuep = cuPtrVector[page.entryIndex + i];
+ *ep++ = cuep->functionAddress;
+ *ep++ = cuep->encoding;
+ }
}
- p2p =
- reinterpret_cast<unwind_info_compressed_second_level_page_header *>(ep);
+ pp += SECOND_LEVEL_PAGE_WORDS;
}
- assert(getSize() ==
- static_cast<size_t>((reinterpret_cast<uint8_t *>(p2p) - buf)));
}
diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h
index 61e639dc94b3..2285cf930d83 100644
--- a/lld/MachO/UnwindInfoSection.h
+++ b/lld/MachO/UnwindInfoSection.h
@@ -49,35 +49,30 @@ class UnwindInfoSection : public SyntheticSection {
compactUnwindSection = cuSection;
}
+ using EncodingMap = llvm::DenseMap<compact_unwind_encoding_t, size_t>;
+
+ struct SecondLevelPage {
+ uint32_t kind;
+ size_t entryIndex;
+ size_t entryCount;
+ size_t byteCount;
+ std::vector<compact_unwind_encoding_t> localEncodings;
+ EncodingMap localEncodingIndexes;
+ };
+
private:
std::vector<std::pair<compact_unwind_encoding_t, size_t>> commonEncodings;
+ EncodingMap commonEncodingIndexes;
std::vector<uint32_t> personalities;
std::vector<unwind_info_section_header_lsda_index_entry> lsdaEntries;
std::vector<CompactUnwindEntry64> cuVector;
std::vector<const CompactUnwindEntry64 *> cuPtrVector;
- std::vector<std::vector<const CompactUnwindEntry64 *>::const_iterator>
- pageBounds;
+ std::vector<SecondLevelPage> secondLevelPages;
MergedOutputSection *compactUnwindSection = nullptr;
uint64_t level2PagesOffset = 0;
uint64_t unwindInfoSize = 0;
};
-#define UNWIND_INFO_COMMON_ENCODINGS_MAX 127
-
-#define UNWIND_INFO_SECOND_LEVEL_PAGE_SIZE 4096
-#define UNWIND_INFO_REGULAR_SECOND_LEVEL_ENTRIES_MAX \
- ((UNWIND_INFO_SECOND_LEVEL_PAGE_SIZE - \
- sizeof(unwind_info_regular_second_level_page_header)) / \
- sizeof(unwind_info_regular_second_level_entry))
-#define UNWIND_INFO_COMPRESSED_SECOND_LEVEL_ENTRIES_MAX \
- ((UNWIND_INFO_SECOND_LEVEL_PAGE_SIZE - \
- sizeof(unwind_info_compressed_second_level_page_header)) / \
- sizeof(uint32_t))
-
-#define UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_BITS 24
-#define UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET_MASK \
- UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(~0)
-
} // namespace macho
} // namespace lld
diff --git a/lld/test/MachO/tools/generate-cfi-funcs.py b/lld/test/MachO/tools/generate-cfi-funcs.py
index d9c6ecd0fb71..a91eab3eeac6 100755
--- a/lld/test/MachO/tools/generate-cfi-funcs.py
+++ b/lld/test/MachO/tools/generate-cfi-funcs.py
@@ -24,7 +24,7 @@ def print_function(name):
have_lsda = (random.random() < lsda_odds)
frame_size = random.randint(4, 64) * 16
frame_offset = -random.randint(0, (frame_size/16 - 4)) * 16
- reg_count = random.randint(0, 4)
+ reg_count = random.randint(0, 5)
reg_combo = random.randint(0, factorial(reg_count) - 1)
regs_saved = saved_regs_combined[reg_count][reg_combo]
global func_size_low, func_size_high
diff --git a/lld/test/MachO/tools/validate-unwind-info.py b/lld/test/MachO/tools/validate-unwind-info.py
index ee4cca4698ef..f3619b4baac6 100755
--- a/lld/test/MachO/tools/validate-unwind-info.py
+++ b/lld/test/MachO/tools/validate-unwind-info.py
@@ -45,7 +45,7 @@ def main():
sys.exit("no program symbols found in input")
program_common_encodings = (
- re.findall(r"^\s+encoding\[\d+\]: 0x(%s+)$" % hex,
+ re.findall(r"^\s+encoding\[(?:\d|\d\d|1[01]\d|12[0-6])\]: 0x(%s+)$" % hex,
objdump_string, re.MULTILINE))
if not program_common_encodings:
sys.exit("no common encodings found in input")
@@ -53,7 +53,7 @@ def main():
program_encodings_map = {program_symbols_map[address]:encoding
for address, encoding in
re.findall(r"^\s+\[\d+\]: function offset=0x(%s+), " % hex +
- r"encoding\[\d+\]=0x(%s+)$" % hex,
+ r"encoding(?:\[\d+\])?=0x(%s+)$" % hex,
objdump_string, re.MULTILINE)}
if not object_encodings_map:
sys.exit("no program encodings found in input")
@@ -73,8 +73,8 @@ def main():
if program_encodings_map != object_encodings_map:
if args.debug:
- pprint("program encodings map:\n" + program_encodings_map)
- pprint("object encodings map:\n" + object_encodings_map)
+ pprint("program encodings map:\n" + str(program_encodings_map))
+ pprint("object encodings map:\n" + str(object_encodings_map))
sys.exit("encoding maps
diff er")
# Count frequency of object-file folded encodings
@@ -86,11 +86,12 @@ def main():
sorted(encoding_frequency_map,
key=lambda x: (encoding_frequency_map.get(x), x),
reverse=True)]
+ del encoding_frequencies[127:]
if program_common_encodings != encoding_frequencies:
if args.debug:
- pprint("program common encodings:\n" + program_common_encodings)
- pprint("object encoding frequencies:\n" + encoding_frequencies)
+ pprint("program common encodings:\n" + str(program_common_encodings))
+ pprint("object encoding frequencies:\n" + str(encoding_frequencies))
sys.exit("encoding frequencies
diff er")
More information about the llvm-branch-commits
mailing list