[llvm] [BOLT] Fix incorrect basic block output addresses (PR #70000)
Job Noorman via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 24 00:13:22 PDT 2023
================
@@ -26,37 +48,67 @@ void AddressMap::emit(MCStreamer &Streamer, BinaryContext &BC) {
if (!BB.getLabel()->isDefined())
continue;
- emitLabel(Streamer, BFAddress + BB.getInputAddressRange().first,
- BB.getLabel());
+ Streamer.switchSection(BC.getDataSection(LabelSectionName));
+ emitLabel(Streamer, BB.getLabel());
if (!BB.hasLocSyms())
continue;
+ Streamer.switchSection(BC.getDataSection(AddressSectionName));
for (auto [Offset, Symbol] : BB.getLocSyms())
- emitLabel(Streamer, BFAddress + Offset, Symbol);
+ emitAddress(Streamer, BFAddress + Offset, Symbol);
}
}
}
-AddressMap AddressMap::parse(StringRef Buffer, const BinaryContext &BC) {
- const auto EntrySize = 2 * BC.AsmInfo->getCodePointerSize();
- assert(Buffer.size() % EntrySize == 0 && "Unexpected address map size");
+std::optional<AddressMap> AddressMap::parse(BinaryContext &BC) {
+ auto AddressMapSection = BC.getUniqueSectionByName(AddressSectionName);
+ auto LabelMapSection = BC.getUniqueSectionByName(LabelSectionName);
- DataExtractor DE(Buffer, BC.AsmInfo->isLittleEndian(),
- BC.AsmInfo->getCodePointerSize());
- DataExtractor::Cursor Cursor(0);
+ if (!AddressMapSection && !LabelMapSection)
+ return std::nullopt;
AddressMap Parsed;
- Parsed.Map.reserve(Buffer.size() / EntrySize);
- while (Cursor && !DE.eof(Cursor)) {
- const auto Input = DE.getAddress(Cursor);
- const auto Output = DE.getAddress(Cursor);
- if (!Parsed.Map.count(Input))
- Parsed.Map.insert({Input, Output});
+ const size_t EntrySize = 2 * BC.AsmInfo->getCodePointerSize();
+ auto parseSection =
+ [&](BinarySection &Section,
+ function_ref<void(uint64_t, uint64_t)> InsertCallback) {
+ StringRef Buffer = Section.getOutputContents();
+ assert(Buffer.size() % EntrySize == 0 && "Unexpected address map size");
+
+ DataExtractor DE(Buffer, BC.AsmInfo->isLittleEndian(),
+ BC.AsmInfo->getCodePointerSize());
+ DataExtractor::Cursor Cursor(0);
+
+ while (Cursor && !DE.eof(Cursor)) {
+ const uint64_t Input = DE.getAddress(Cursor);
+ const uint64_t Output = DE.getAddress(Cursor);
+ InsertCallback(Input, Output);
+ }
+
+ assert(Cursor && "Error reading address map section");
+ BC.deregisterSection(Section);
+ };
+
+ if (AddressMapSection) {
+ Parsed.Address2AddressMap.reserve(AddressMapSection->getOutputSize() /
+ EntrySize);
+ parseSection(*AddressMapSection, [&](uint64_t Input, uint64_t Output) {
+ if (!Parsed.Address2AddressMap.count(Input))
+ Parsed.Address2AddressMap.insert({Input, Output});
+ });
+ }
+
+ if (LabelMapSection) {
+ Parsed.Label2AddrMap.reserve(LabelMapSection->getOutputSize() / EntrySize);
+ parseSection(*LabelMapSection, [&](uint64_t Input, uint64_t Output) {
+ assert(!Parsed.Label2AddrMap.count(reinterpret_cast<MCSymbol *>(Input)));
----------------
mtvec wrote:
The full pointer-int round-trip is: `const MCSymbol *` -> `uint64_t` -> `MCSymbol *`.
I believe this might technically be UB since `reinterpret_cast` does not allow to cast-away constness ([link](https://en.cppreference.com/w/cpp/language/reinterpret_cast)):
> Only the following conversions can be done with `reinterpret_cast`, except when such conversions would cast away constness or volatility.
Since we store the labels as `const MCSymbol *` in `Label2AddrMap` anyway, I would cast to a `const` pointer here just to be safe.
https://github.com/llvm/llvm-project/pull/70000
More information about the llvm-commits
mailing list