[llvm] [BOLT] Fix incorrect basic block output addresses (PR #70000)

Job Noorman via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 24 00:13:22 PDT 2023


================
@@ -26,37 +48,67 @@ void AddressMap::emit(MCStreamer &Streamer, BinaryContext &BC) {
       if (!BB.getLabel()->isDefined())
         continue;
 
-      emitLabel(Streamer, BFAddress + BB.getInputAddressRange().first,
-                BB.getLabel());
+      Streamer.switchSection(BC.getDataSection(LabelSectionName));
+      emitLabel(Streamer, BB.getLabel());
 
       if (!BB.hasLocSyms())
         continue;
 
+      Streamer.switchSection(BC.getDataSection(AddressSectionName));
       for (auto [Offset, Symbol] : BB.getLocSyms())
-        emitLabel(Streamer, BFAddress + Offset, Symbol);
+        emitAddress(Streamer, BFAddress + Offset, Symbol);
     }
   }
 }
 
-AddressMap AddressMap::parse(StringRef Buffer, const BinaryContext &BC) {
-  const auto EntrySize = 2 * BC.AsmInfo->getCodePointerSize();
-  assert(Buffer.size() % EntrySize == 0 && "Unexpected address map size");
+std::optional<AddressMap> AddressMap::parse(BinaryContext &BC) {
+  auto AddressMapSection = BC.getUniqueSectionByName(AddressSectionName);
+  auto LabelMapSection = BC.getUniqueSectionByName(LabelSectionName);
 
-  DataExtractor DE(Buffer, BC.AsmInfo->isLittleEndian(),
-                   BC.AsmInfo->getCodePointerSize());
-  DataExtractor::Cursor Cursor(0);
+  if (!AddressMapSection && !LabelMapSection)
+    return std::nullopt;
 
   AddressMap Parsed;
-  Parsed.Map.reserve(Buffer.size() / EntrySize);
 
-  while (Cursor && !DE.eof(Cursor)) {
-    const auto Input = DE.getAddress(Cursor);
-    const auto Output = DE.getAddress(Cursor);
-    if (!Parsed.Map.count(Input))
-      Parsed.Map.insert({Input, Output});
+  const size_t EntrySize = 2 * BC.AsmInfo->getCodePointerSize();
+  auto parseSection =
+      [&](BinarySection &Section,
+          function_ref<void(uint64_t, uint64_t)> InsertCallback) {
+        StringRef Buffer = Section.getOutputContents();
+        assert(Buffer.size() % EntrySize == 0 && "Unexpected address map size");
+
+        DataExtractor DE(Buffer, BC.AsmInfo->isLittleEndian(),
+                         BC.AsmInfo->getCodePointerSize());
+        DataExtractor::Cursor Cursor(0);
+
+        while (Cursor && !DE.eof(Cursor)) {
+          const uint64_t Input = DE.getAddress(Cursor);
+          const uint64_t Output = DE.getAddress(Cursor);
+          InsertCallback(Input, Output);
+        }
+
+        assert(Cursor && "Error reading address map section");
+        BC.deregisterSection(Section);
+      };
+
+  if (AddressMapSection) {
+    Parsed.Address2AddressMap.reserve(AddressMapSection->getOutputSize() /
+                                      EntrySize);
+    parseSection(*AddressMapSection, [&](uint64_t Input, uint64_t Output) {
+      if (!Parsed.Address2AddressMap.count(Input))
+        Parsed.Address2AddressMap.insert({Input, Output});
+    });
+  }
+
+  if (LabelMapSection) {
+    Parsed.Label2AddrMap.reserve(LabelMapSection->getOutputSize() / EntrySize);
+    parseSection(*LabelMapSection, [&](uint64_t Input, uint64_t Output) {
+      assert(!Parsed.Label2AddrMap.count(reinterpret_cast<MCSymbol *>(Input)));
----------------
mtvec wrote:

The full pointer-int round-trip is: `const MCSymbol *` -> `uint64_t` -> `MCSymbol *`.

I believe this might technically be UB since `reinterpret_cast` does not allow to cast-away constness ([link](https://en.cppreference.com/w/cpp/language/reinterpret_cast)):

> Only the following conversions can be done with `reinterpret_cast`, except when such conversions would cast away constness or volatility. 

Since we store the labels as `const MCSymbol *` in `Label2AddrMap` anyway, I would cast to a `const` pointer here just to be safe.

https://github.com/llvm/llvm-project/pull/70000


More information about the llvm-commits mailing list