[llvm] [BOLT] Introduce helpers to match `MCInst`s one at a time (NFC) (PR #138883)

Anatoly Trosinenko via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 24 03:53:50 PDT 2025


https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/138883

>From a1e344f9b537b272ff2b776a6d0d5add441efe19 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Wed, 7 May 2025 16:42:00 +0300
Subject: [PATCH 1/4] [BOLT] Introduce helpers to match `MCInst`s one at a time
 (NFC)

Introduce matchInst helper function to capture and/or match the operands
of MCInst. Unlike the existing `MCPlusBuilder::MCInstMatcher` machinery,
matchInst is intended for the use cases when precise control over the
instruction order is required. For example, when validating PtrAuth
hardening, all registers are usually considered unsafe after a function
call, even though callee-saved registers should preserve their old
values *under normal operation*.
---
 bolt/include/bolt/Core/MCInstUtils.h          | 128 ++++++++++++++++++
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  90 +++++-------
 2 files changed, 162 insertions(+), 56 deletions(-)

diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h
index 2e93dfaf4c275..40731a4fed701 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -149,6 +149,134 @@ static inline raw_ostream &operator<<(raw_ostream &OS,
   return Ref.print(OS);
 }
 
+/// Instruction-matching helpers operating on a single instruction at a time.
+///
+/// Unlike MCPlusBuilder::MCInstMatcher, this matchInst() function focuses on
+/// the cases where a precise control over the instruction order is important:
+///
+///     // Bring the short names into the local scope:
+///     using namespace MCInstMatcher;
+///     // Declare the registers to capture:
+///     Reg Xn, Xm;
+///     // Capture the 0th and 1st operands, match the 2nd operand against the
+///     // just captured Xm register, match the 3rd operand against literal 0:
+///     if (!matchInst(MaybeAdd, AArch64::ADDXrs, Xm, Xn, Xm, Imm(0))
+///       return AArch64::NoRegister;
+///     // Match the 0th operand against Xm:
+///     if (!matchInst(MaybeBr, AArch64::BR, Xm))
+///       return AArch64::NoRegister;
+///     // Return the matched register:
+///     return Xm.get();
+namespace MCInstMatcher {
+
+// The base class to match an operand of type T.
+//
+// The subclasses of OpMatcher are intended to be allocated on the stack and
+// to only be used by passing them to matchInst() and by calling their get()
+// function, thus the peculiar `mutable` specifiers: to make the calling code
+// compact and readable, the templated matchInst() function has to accept both
+// long-lived Imm/Reg wrappers declared as local variables (intended to capture
+// the first operand's value and match the subsequent operands, whether inside
+// a single instruction or across multiple instructions), as well as temporary
+// wrappers around literal values to match, f.e. Imm(42) or Reg(AArch64::XZR).
+template <typename T> class OpMatcher {
+  mutable std::optional<T> Value;
+  mutable std::optional<T> SavedValue;
+
+  // Remember/restore the last Value - to be called by matchInst.
+  void remember() const { SavedValue = Value; }
+  void restore() const { Value = SavedValue; }
+
+  template <class... OpMatchers>
+  friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
+
+protected:
+  OpMatcher(std::optional<T> ValueToMatch) : Value(ValueToMatch) {}
+
+  bool matchValue(T OpValue) const {
+    // Check that OpValue does not contradict the existing Value.
+    bool MatchResult = !Value || *Value == OpValue;
+    // If MatchResult is false, all matchers will be reset before returning from
+    // matchInst, including this one, thus no need to assign conditionally.
+    Value = OpValue;
+
+    return MatchResult;
+  }
+
+public:
+  /// Returns the captured value.
+  T get() const {
+    assert(Value.has_value());
+    return *Value;
+  }
+};
+
+class Reg : public OpMatcher<MCPhysReg> {
+  bool matches(const MCOperand &Op) const {
+    if (!Op.isReg())
+      return false;
+
+    return matchValue(Op.getReg());
+  }
+
+  template <class... OpMatchers>
+  friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
+
+public:
+  Reg(std::optional<MCPhysReg> RegToMatch = std::nullopt)
+      : OpMatcher<MCPhysReg>(RegToMatch) {}
+};
+
+class Imm : public OpMatcher<int64_t> {
+  bool matches(const MCOperand &Op) const {
+    if (!Op.isImm())
+      return false;
+
+    return matchValue(Op.getImm());
+  }
+
+  template <class... OpMatchers>
+  friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
+
+public:
+  Imm(std::optional<int64_t> ImmToMatch = std::nullopt)
+      : OpMatcher<int64_t>(ImmToMatch) {}
+};
+
+/// Tries to match Inst and updates Ops on success.
+///
+/// If Inst has the specified Opcode and its operand list prefix matches Ops,
+/// this function returns true and updates Ops, otherwise false is returned and
+/// values of Ops are kept as before matchInst was called.
+///
+/// Please note that while Ops are technically passed by a const reference to
+/// make invocations like `matchInst(MI, Opcode, Imm(42))` possible, all their
+/// fields are marked mutable.
+template <class... OpMatchers>
+bool matchInst(const MCInst &Inst, unsigned Opcode, const OpMatchers &...Ops) {
+  if (Inst.getOpcode() != Opcode)
+    return false;
+  assert(sizeof...(Ops) <= Inst.getNumOperands() &&
+         "Too many operands are matched for the Opcode");
+
+  // Ask each matcher to remember its current value in case of rollback.
+  (Ops.remember(), ...);
+
+  // Check if all matchers match the corresponding operands.
+  auto It = Inst.begin();
+  auto AllMatched = (Ops.matches(*(It++)) && ... && true);
+
+  // If match failed, restore the original captured values.
+  if (!AllMatched) {
+    (Ops.restore(), ...);
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace MCInstMatcher
+
 } // namespace bolt
 } // namespace llvm
 
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 72f95cea6fa1d..c1052ad5c0ba9 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -19,6 +19,7 @@
 #include "Utils/AArch64BaseInfo.h"
 #include "bolt/Core/BinaryBasicBlock.h"
 #include "bolt/Core/BinaryFunction.h"
+#include "bolt/Core/MCInstUtils.h"
 #include "bolt/Core/MCPlusBuilder.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCContext.h"
@@ -389,81 +390,58 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
 
     // Iterate over the instructions of BB in reverse order, matching opcodes
     // and operands.
-    MCPhysReg TestedReg = 0;
-    MCPhysReg ScratchReg = 0;
+
     auto It = BB.end();
-    auto StepAndGetOpcode = [&It, &BB]() -> int {
-      if (It == BB.begin())
-        return -1;
-      --It;
-      return It->getOpcode();
+    auto StepBack = [&]() {
+      while (It != BB.begin()) {
+        --It;
+        if (!isCFI(*It))
+          return true;
+      }
+      return false;
     };
-
-    switch (StepAndGetOpcode()) {
-    default:
-      // Not matched the branch instruction.
+    // Step to the last non-CFI instruction.
+    if (!StepBack())
       return std::nullopt;
-    case AArch64::Bcc:
-      // Bcc EQ, .Lon_success
-      if (It->getOperand(0).getImm() != AArch64CC::EQ)
-        return std::nullopt;
-      // Not checking .Lon_success (see above).
 
-      // SUBSXrs XZR, TestedReg, ScratchReg, 0 (used by "CMP reg, reg" alias)
-      if (StepAndGetOpcode() != AArch64::SUBSXrs ||
-          It->getOperand(0).getReg() != AArch64::XZR ||
-          It->getOperand(3).getImm() != 0)
+    using namespace llvm::bolt::MCInstMatcher;
+    Reg TestedReg;
+    Reg ScratchReg;
+
+    if (matchInst(*It, AArch64::Bcc, Imm(AArch64CC::EQ) /*, .Lon_success*/)) {
+      if (!StepBack() || !matchInst(*It, AArch64::SUBSXrs, Reg(AArch64::XZR),
+                                    TestedReg, ScratchReg, Imm(0)))
         return std::nullopt;
-      TestedReg = It->getOperand(1).getReg();
-      ScratchReg = It->getOperand(2).getReg();
 
       // Either XPAC(I|D) ScratchReg, ScratchReg
       // or     XPACLRI
-      switch (StepAndGetOpcode()) {
-      default:
+      if (!StepBack())
         return std::nullopt;
-      case AArch64::XPACLRI:
+      if (matchInst(*It, AArch64::XPACLRI)) {
         // No operands to check, but using XPACLRI forces TestedReg to be X30.
-        if (TestedReg != AArch64::LR)
-          return std::nullopt;
-        break;
-      case AArch64::XPACI:
-      case AArch64::XPACD:
-        if (It->getOperand(0).getReg() != ScratchReg ||
-            It->getOperand(1).getReg() != ScratchReg)
+        if (TestedReg.get() != AArch64::LR)
           return std::nullopt;
-        break;
+      } else if (!matchInst(*It, AArch64::XPACI, ScratchReg, ScratchReg) &&
+                 !matchInst(*It, AArch64::XPACD, ScratchReg, ScratchReg)) {
+        return std::nullopt;
       }
 
-      // ORRXrs ScratchReg, XZR, TestedReg, 0 (used by "MOV reg, reg" alias)
-      if (StepAndGetOpcode() != AArch64::ORRXrs)
+      if (!StepBack() || !matchInst(*It, AArch64::ORRXrs, ScratchReg,
+                                    Reg(AArch64::XZR), TestedReg, Imm(0)))
         return std::nullopt;
-      if (It->getOperand(0).getReg() != ScratchReg ||
-          It->getOperand(1).getReg() != AArch64::XZR ||
-          It->getOperand(2).getReg() != TestedReg ||
-          It->getOperand(3).getImm() != 0)
-        return std::nullopt;
-
-      return std::make_pair(TestedReg, &*It);
 
-    case AArch64::TBZX:
-      // TBZX ScratchReg, 62, .Lon_success
-      ScratchReg = It->getOperand(0).getReg();
-      if (It->getOperand(1).getImm() != 62)
-        return std::nullopt;
-      // Not checking .Lon_success (see above).
+      return std::make_pair(TestedReg.get(), &*It);
+    }
 
-      // EORXrs ScratchReg, TestedReg, TestedReg, 1
-      if (StepAndGetOpcode() != AArch64::EORXrs)
-        return std::nullopt;
-      TestedReg = It->getOperand(1).getReg();
-      if (It->getOperand(0).getReg() != ScratchReg ||
-          It->getOperand(2).getReg() != TestedReg ||
-          It->getOperand(3).getImm() != 1)
+    if (matchInst(*It, AArch64::TBZX, ScratchReg, Imm(62) /*, .Lon_success*/)) {
+      if (!StepBack() || !matchInst(*It, AArch64::EORXrs, Reg(ScratchReg),
+                                    TestedReg, TestedReg, Imm(1)))
         return std::nullopt;
 
-      return std::make_pair(TestedReg, &*It);
+      return std::make_pair(TestedReg.get(), &*It);
     }
+
+    return std::nullopt;
   }
 
   std::optional<MCPhysReg> getAuthCheckedReg(const MCInst &Inst,

>From 30b6b6076b787353b62a4499e5676e8cc283a25e Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Tue, 23 Sep 2025 22:56:44 +0300
Subject: [PATCH 2/4] Improve the description; use MCPlus::getNumPrimeOperands

---
 bolt/include/bolt/Core/MCInstUtils.h          | 48 +++++++++++++++++--
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  2 +-
 2 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h
index 40731a4fed701..21e6489057d01 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -10,6 +10,7 @@
 #define BOLT_CORE_MCINSTUTILS_H
 
 #include "bolt/Core/BinaryBasicBlock.h"
+#include "bolt/Core/MCPlus.h"
 #include <map>
 #include <variant>
 
@@ -151,11 +152,44 @@ static inline raw_ostream &operator<<(raw_ostream &OS,
 
 /// Instruction-matching helpers operating on a single instruction at a time.
 ///
+/// The idea is to make low-level instruction matching as readable as possible.
+/// The classes contained in this namespace are intended to be used as a
+/// domain-specific language to match MCInst with the particular opcode and
+/// operands.
+///
+/// The goals of this DSL include
+/// * matching a single instruction against a template consisting of the
+///   particular target-specific opcode and a pattern of operands
+/// * matching operands against the known values (such as 42, AArch64::X1 or
+///   "the value of --brk-operand=N command line argument")
+/// * capturing operands of an instruction ("whatever is the destination
+///   register of AArch64::ADDXri instruction, store it to Xd variable to be
+///   queried later")
+/// * expressing repeated operands of a single matched instruction (such as
+///   "ADDXri Xd, Xd, 42, 0" for an arbitrary register Xm) as well as across
+///   multiple calls to matchInst(), which is naturally achieved by combining
+///   capturing operands and matching against the known values
+/// * matching multi-instruction code patterns by sequentially calling
+///   matchInst() while passing around already matched operands
+///
+/// The non-goals (compared to MCPlusBuilder::MCInstMatcher) include
+/// * matching an arbitrary tree of instructions in a single matchInst() call
+/// * encapsulation of target-specific knowledge ("match an increment of Xm
+///   by 42")
+///
 /// Unlike MCPlusBuilder::MCInstMatcher, this matchInst() function focuses on
-/// the cases where a precise control over the instruction order is important:
+/// the cases where a precise control over the instruction order is important.
+/// For example, one has to match two particular instructions against the
+/// following pattern (for two different registers Xm and Xn)
+///
+///     ADDXrs Xm, Xn, Xm, #0
+///     BR     Xm
+///
+/// and return the register holding the branch target. Assuming the instructions
+/// are available as MaybeAdd and MaybeBr, the following code can be used:
 ///
 ///     // Bring the short names into the local scope:
-///     using namespace MCInstMatcher;
+///     using namespace LowLevelInstMatcherDSL;
 ///     // Declare the registers to capture:
 ///     Reg Xn, Xm;
 ///     // Capture the 0th and 1st operands, match the 2nd operand against the
@@ -165,9 +199,13 @@ static inline raw_ostream &operator<<(raw_ostream &OS,
 ///     // Match the 0th operand against Xm:
 ///     if (!matchInst(MaybeBr, AArch64::BR, Xm))
 ///       return AArch64::NoRegister;
+///     // Manually check that Xm and Xn did not match the same register.
+///     if (Xm.get() == Xn.get())
+///       return AArch64::NoRegister;
 ///     // Return the matched register:
 ///     return Xm.get();
-namespace MCInstMatcher {
+///
+namespace LowLevelInstMatcherDSL {
 
 // The base class to match an operand of type T.
 //
@@ -256,7 +294,7 @@ template <class... OpMatchers>
 bool matchInst(const MCInst &Inst, unsigned Opcode, const OpMatchers &...Ops) {
   if (Inst.getOpcode() != Opcode)
     return false;
-  assert(sizeof...(Ops) <= Inst.getNumOperands() &&
+  assert(sizeof...(Ops) <= MCPlus::getNumPrimeOperands(Inst) &&
          "Too many operands are matched for the Opcode");
 
   // Ask each matcher to remember its current value in case of rollback.
@@ -275,7 +313,7 @@ bool matchInst(const MCInst &Inst, unsigned Opcode, const OpMatchers &...Ops) {
   return true;
 }
 
-} // namespace MCInstMatcher
+} // namespace LowLevelInstMatcherDSL
 
 } // namespace bolt
 } // namespace llvm
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index c1052ad5c0ba9..962068a35f3bd 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -404,7 +404,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     if (!StepBack())
       return std::nullopt;
 
-    using namespace llvm::bolt::MCInstMatcher;
+    using namespace llvm::bolt::LowLevelInstMatcherDSL;
     Reg TestedReg;
     Reg ScratchReg;
 

>From 81f801e8618995b549b5c02cfee4fc2a4d7cfa7f Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Wed, 24 Sep 2025 13:14:54 +0300
Subject: [PATCH 3/4] Polish the description of LowLevelInstMatcherDSL

---
 bolt/include/bolt/Core/MCInstUtils.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h
index 21e6489057d01..2436e1359f40e 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -158,7 +158,7 @@ static inline raw_ostream &operator<<(raw_ostream &OS,
 /// operands.
 ///
 /// The goals of this DSL include
-/// * matching a single instruction against a template consisting of the
+/// * matching a single instruction against the template consisting of the
 ///   particular target-specific opcode and a pattern of operands
 /// * matching operands against the known values (such as 42, AArch64::X1 or
 ///   "the value of --brk-operand=N command line argument")
@@ -166,9 +166,9 @@ static inline raw_ostream &operator<<(raw_ostream &OS,
 ///   register of AArch64::ADDXri instruction, store it to Xd variable to be
 ///   queried later")
 /// * expressing repeated operands of a single matched instruction (such as
-///   "ADDXri Xd, Xd, 42, 0" for an arbitrary register Xm) as well as across
-///   multiple calls to matchInst(), which is naturally achieved by combining
-///   capturing operands and matching against the known values
+///   "ADDXri Xd, Xd, 42, 0" for an arbitrary register Xd) as well as across
+///   multiple calls to matchInst(), which is naturally achieved by sequentially
+///   capturing the operands and matching operands against the known values
 /// * matching multi-instruction code patterns by sequentially calling
 ///   matchInst() while passing around already matched operands
 ///
@@ -177,10 +177,10 @@ static inline raw_ostream &operator<<(raw_ostream &OS,
 /// * encapsulation of target-specific knowledge ("match an increment of Xm
 ///   by 42")
 ///
-/// Unlike MCPlusBuilder::MCInstMatcher, this matchInst() function focuses on
-/// the cases where a precise control over the instruction order is important.
-/// For example, one has to match two particular instructions against the
-/// following pattern (for two different registers Xm and Xn)
+/// Unlike MCPlusBuilder::MCInstMatcher, this DSL focuses on the use cases when
+/// the precise control over the instruction order is important. For example,
+/// let's consider a target-specific function that has to match two particular
+/// instructions against this pattern (for two different registers Xm and Xn)
 ///
 ///     ADDXrs Xm, Xn, Xm, #0
 ///     BR     Xm
@@ -199,7 +199,7 @@ static inline raw_ostream &operator<<(raw_ostream &OS,
 ///     // Match the 0th operand against Xm:
 ///     if (!matchInst(MaybeBr, AArch64::BR, Xm))
 ///       return AArch64::NoRegister;
-///     // Manually check that Xm and Xn did not match the same register.
+///     // Manually check that Xm and Xn did not match the same register:
 ///     if (Xm.get() == Xn.get())
 ///       return AArch64::NoRegister;
 ///     // Return the matched register:

>From 5a3b71a66060d3a7aaa432bf43f381fc344a0f91 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Wed, 24 Sep 2025 13:17:14 +0300
Subject: [PATCH 4/4] Misc trivial fixes for getAuthCheckedReg

---
 bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 962068a35f3bd..0ac7f6a7411f8 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -395,6 +395,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     auto StepBack = [&]() {
       while (It != BB.begin()) {
         --It;
+        // Skip any CFI instructions, but no other pseudos are expected here.
         if (!isCFI(*It))
           return true;
       }
@@ -434,8 +435,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     }
 
     if (matchInst(*It, AArch64::TBZX, ScratchReg, Imm(62) /*, .Lon_success*/)) {
-      if (!StepBack() || !matchInst(*It, AArch64::EORXrs, Reg(ScratchReg),
-                                    TestedReg, TestedReg, Imm(1)))
+      if (!StepBack() || !matchInst(*It, AArch64::EORXrs, ScratchReg, TestedReg,
+                                    TestedReg, Imm(1)))
         return std::nullopt;
 
       return std::make_pair(TestedReg.get(), &*It);



More information about the llvm-commits mailing list