[llvm] [BOLT] Make instruction size a first-class annotation (PR #72167)

Maksim Panchenko via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 13 14:18:11 PST 2023


https://github.com/maksfb created https://github.com/llvm/llvm-project/pull/72167

When NOP instructions are used to reserve space in the code, e.g. for patching, it becomes critical to preserve their original size while emitting the code. On x86, we rely on "Size" annotation for NOP instructions size, as the original instruction size is lost in the disassembly/assembly process.

This change makes instruction size a first-class annotation and is affectively NFCI. A follow-up diff will use the annotation for code emission.

>From 261f891102657f6eb5f4cf718a6bc0f8de3d31d4 Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Mon, 13 Nov 2023 12:31:07 -0800
Subject: [PATCH] [BOLT] Make instruction size a first-class annotation

When NOP instructions are used to reserve space in the code, e.g. for
patching, it becomes critical to preserve their original size while
emitting the code. On x86, we rely on "Size" annotation for NOP
instructions size, as the original instruction size is lost in the
disassembly/assembly process.

This change makes instruction size a first-class annotation and is
affectively NFCI. A follow-up diff will use the annotation for code
emission.
---
 bolt/include/bolt/Core/BinaryContext.h |  4 ++--
 bolt/include/bolt/Core/MCPlus.h        |  1 +
 bolt/include/bolt/Core/MCPlusBuilder.h |  6 ++++++
 bolt/lib/Core/BinaryContext.cpp        |  2 ++
 bolt/lib/Core/BinaryFunction.cpp       | 11 ++++++-----
 bolt/lib/Core/MCPlusBuilder.cpp        | 11 +++++++++++
 bolt/lib/Profile/DataReader.cpp        |  3 ++-
 bolt/lib/Rewrite/RewriteInstance.cpp   |  1 -
 8 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 1f27133644d4abd..a4e84cb93c093dc 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -1239,8 +1239,8 @@ class BinaryContext {
   uint64_t
   computeInstructionSize(const MCInst &Inst,
                          const MCCodeEmitter *Emitter = nullptr) const {
-    if (auto Size = MIB->getAnnotationWithDefault<uint32_t>(Inst, "Size"))
-      return Size;
+    if (std::optional<uint32_t> Size = MIB->getSize(Inst))
+      return *Size;
 
     if (!Emitter)
       Emitter = this->MCE.get();
diff --git a/bolt/include/bolt/Core/MCPlus.h b/bolt/include/bolt/Core/MCPlus.h
index f6ffd33513dd258..b6a9e73f2347e71 100644
--- a/bolt/include/bolt/Core/MCPlus.h
+++ b/bolt/include/bolt/Core/MCPlus.h
@@ -72,6 +72,7 @@ class MCAnnotation {
     kConditionalTailCall, /// CTC.
     kOffset,              /// Offset in the function.
     kLabel,               /// MCSymbol pointing to this instruction.
+    kSize,                /// Size of the instruction.
     kGeneric              /// First generic annotation.
   };
 
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 41a5dd8399dfc2b..687b49a3cbda43a 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1179,6 +1179,12 @@ class MCPlusBuilder {
   /// is emitted to MCStreamer.
   bool setLabel(MCInst &Inst, MCSymbol *Label) const;
 
+  /// Get instruction size specified via annotation.
+  std::optional<uint32_t> getSize(const MCInst &Inst) const;
+
+  /// Set instruction size.
+  void setSize(MCInst &Inst, uint32_t Size) const;
+
   /// Return MCSymbol that represents a target of this instruction at a given
   /// operand number \p OpNum. If there's no symbol associated with
   /// the operand - return nullptr.
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 00725a2ff2225a1..fd4d09964b725e1 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1897,6 +1897,8 @@ void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
   }
   if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction))
     OS << " # Offset: " << *Offset;
+  if (std::optional<uint32_t> Size = MIB->getSize(Instruction))
+    OS << " # Size: " << *Size;
   if (MCSymbol *Label = MIB->getLabel(Instruction))
     OS << " # Label: " << *Label;
 
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 17c2c0ca408d99b..9bbdd0deaff2d3a 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1380,7 +1380,7 @@ bool BinaryFunction::disassemble() {
       // NOTE: disassembly loses the correct size information for noops on x86.
       //       E.g. nopw 0x0(%rax,%rax,1) is 9 bytes, but re-encoded it's only
       //       5 bytes. Preserve the size info using annotations.
-      MIB->addAnnotation(Instruction, "Size", static_cast<uint32_t>(Size));
+      MIB->setSize(Instruction, Size);
     }
 
     addInstruction(Offset, std::move(Instruction));
@@ -4353,10 +4353,11 @@ MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) {
     }
 
     if (MCInst *LastInstr = BB->getLastNonPseudoInstr()) {
-      const uint32_t Size =
-          BC.MIB->getAnnotationWithDefault<uint32_t>(*LastInstr, "Size");
-      if (BB->getEndOffset() - Offset == Size)
-        return LastInstr;
+      if (std::optional<uint32_t> Size = BC.MIB->getSize(*LastInstr)) {
+        if (BB->getEndOffset() - Offset == Size) {
+          return LastInstr;
+        }
+      }
     }
 
     return nullptr;
diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp
index 8c8ebe7d9830d26..44e5f88d8950fc3 100644
--- a/bolt/lib/Core/MCPlusBuilder.cpp
+++ b/bolt/lib/Core/MCPlusBuilder.cpp
@@ -279,6 +279,17 @@ bool MCPlusBuilder::setLabel(MCInst &Inst, MCSymbol *Label) const {
   return true;
 }
 
+std::optional<uint32_t> MCPlusBuilder::getSize(const MCInst &Inst) const {
+  if (std::optional<int64_t> Value =
+          getAnnotationOpValue(Inst, MCAnnotation::kSize))
+    return static_cast<uint32_t>(*Value);
+  return std::nullopt;
+}
+
+void MCPlusBuilder::setSize(MCInst &Inst, uint32_t Size) const {
+  setAnnotationOpValue(Inst, MCAnnotation::kSize, Size);
+}
+
 bool MCPlusBuilder::hasAnnotation(const MCInst &Inst, unsigned Index) const {
   return (bool)getAnnotationOpValue(Inst, Index);
 }
diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp
index 64873e0de4bab55..3d4db6feb51a72e 100644
--- a/bolt/lib/Profile/DataReader.cpp
+++ b/bolt/lib/Profile/DataReader.cpp
@@ -698,7 +698,8 @@ bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
       if (!BC.MIB->isNoop(Instr))
         break;
 
-      Offset += BC.MIB->getAnnotationWithDefault<uint32_t>(Instr, "Size");
+      if (std::optional<uint32_t> Size = BC.MIB->getSize(Instr))
+        Offset += *Size;
     }
 
     if (To == Offset)
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index af100447786bb00..ce3402c5827ae6d 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -3212,7 +3212,6 @@ void RewriteInstance::buildFunctionsCFG() {
   // Create annotation indices to allow lock-free execution
   BC->MIB->getOrCreateAnnotationIndex("JTIndexReg");
   BC->MIB->getOrCreateAnnotationIndex("NOP");
-  BC->MIB->getOrCreateAnnotationIndex("Size");
 
   ParallelUtilities::WorkFuncWithAllocTy WorkFun =
       [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) {



More information about the llvm-commits mailing list