[llvm] [BOLT][binary-analysis] Add initial pac-ret gadget scanner (PR #122304)
Anatoly Trosinenko via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 10 05:02:02 PST 2025
================
@@ -0,0 +1,207 @@
+//===- bolt/Passes/NonPacProtectedRetAnalysis.h -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BOLT_PASSES_NONPACPROTECTEDRETANALYSIS_H
+#define BOLT_PASSES_NONPACPROTECTEDRETANALYSIS_H
+
+#include "bolt/Core/BinaryContext.h"
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Passes/BinaryPasses.h"
+#include "llvm/ADT/SmallSet.h"
+
+namespace llvm {
+namespace bolt {
+
+/// @brief MCInstReference represents a reference to an MCInst as stored either
+/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
+/// (after a CFG is created). It aims to store the necessary information to be
+/// able to find the specific MCInst in either the BinaryFunction or
+/// BinaryBasicBlock data structures later, so that e.g. the InputAddress of
+/// the corresponding instruction can be computed.
+
+struct MCInstInBBReference {
+ BinaryBasicBlock *BB;
+ int64_t BBIndex;
+ MCInstInBBReference(BinaryBasicBlock *BB, int64_t BBIndex)
+ : BB(BB), BBIndex(BBIndex) {}
+ MCInstInBBReference() : BB(nullptr), BBIndex(0) {}
+ static MCInstInBBReference get(const MCInst *Inst, BinaryFunction &BF) {
+ for (BinaryBasicBlock &BB : BF)
+ for (size_t I = 0; I < BB.size(); ++I)
+ if (Inst == &(BB.getInstructionAtIndex(I)))
+ return MCInstInBBReference(&BB, I);
+ return {};
+ }
+ bool operator==(const MCInstInBBReference &RHS) const {
+ return BB == RHS.BB && BBIndex == RHS.BBIndex;
+ }
+ bool operator<(const MCInstInBBReference &RHS) const {
+ if (BB != RHS.BB)
+ return BB < RHS.BB;
+ return BBIndex < RHS.BBIndex;
+ }
+ operator MCInst &() const {
+ assert(BB != nullptr);
+ return BB->getInstructionAtIndex(BBIndex);
+ }
+ uint64_t getAddress() const {
+ // 4 bytes per instruction on AArch64;
+ return BB->getFunction()->getAddress() + BB->getOffset() + BBIndex * 4;
+ }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &);
+
+struct MCInstInBFReference {
+ BinaryFunction *BF;
+ uint32_t Offset;
+ MCInstInBFReference(BinaryFunction *BF, uint32_t Offset)
+ : BF(BF), Offset(Offset) {}
+ MCInstInBFReference() : BF(nullptr) {}
+ bool operator==(const MCInstInBFReference &RHS) const {
+ return BF == RHS.BF && Offset == RHS.Offset;
+ }
+ bool operator<(const MCInstInBFReference &RHS) const {
+ if (BF != RHS.BF)
+ return BF < RHS.BF;
+ return Offset < RHS.Offset;
+ }
+ operator MCInst &() const {
+ assert(BF != nullptr);
+ return *(BF->getInstructionAtOffset(Offset));
+ }
+
+ uint64_t getOffset() const { return Offset; }
+
+ uint64_t getAddress() const { return BF->getAddress() + getOffset(); }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &);
+
+struct MCInstReference {
+ enum StoredIn { _BinaryFunction, _BinaryBasicBlock };
+ StoredIn CurrentLocation;
+ union U {
+ MCInstInBBReference BBRef;
+ MCInstInBFReference BFRef;
+ U(MCInstInBBReference BBRef) : BBRef(BBRef) {}
+ U(MCInstInBFReference BFRef) : BFRef(BFRef) {}
+ } U;
+ MCInstReference(MCInstInBBReference BBRef)
+ : CurrentLocation(_BinaryBasicBlock), U(BBRef) {}
+ MCInstReference(MCInstInBFReference BFRef)
+ : CurrentLocation(_BinaryFunction), U(BFRef) {}
+ MCInstReference(BinaryBasicBlock *BB, int64_t BBIndex)
+ : MCInstReference(MCInstInBBReference(BB, BBIndex)) {}
+ MCInstReference(BinaryFunction *BF, uint32_t Offset)
+ : MCInstReference(MCInstInBFReference(BF, Offset)) {}
+
+ bool operator<(const MCInstReference &RHS) const {
+ if (CurrentLocation != RHS.CurrentLocation)
+ return CurrentLocation < RHS.CurrentLocation;
+ switch (CurrentLocation) {
+ case _BinaryBasicBlock:
+ return U.BBRef < RHS.U.BBRef;
+ case _BinaryFunction:
+ return U.BFRef < RHS.U.BFRef;
+ }
+ llvm_unreachable("");
+ }
+
+ bool operator==(const MCInstReference &RHS) const {
+ if (CurrentLocation != RHS.CurrentLocation)
+ return false;
+ switch (CurrentLocation) {
+ case _BinaryBasicBlock:
+ return U.BBRef == RHS.U.BBRef;
+ case _BinaryFunction:
+ return U.BFRef == RHS.U.BFRef;
+ }
+ llvm_unreachable("");
+ }
+
+ operator MCInst &() const {
+ switch (CurrentLocation) {
+ case _BinaryBasicBlock:
+ return U.BBRef;
+ case _BinaryFunction:
+ return U.BFRef;
+ }
+ llvm_unreachable("");
+ }
+
+ uint64_t getAddress() const {
+ switch (CurrentLocation) {
+ case _BinaryBasicBlock:
+ return U.BBRef.getAddress();
+ case _BinaryFunction:
+ return U.BFRef.getAddress();
+ }
+ llvm_unreachable("");
+ }
+
+ BinaryFunction *getFunction() const {
+ switch (CurrentLocation) {
+ case _BinaryFunction:
+ return U.BFRef.BF;
+ case _BinaryBasicBlock:
+ return U.BBRef.BB->getFunction();
+ }
+ llvm_unreachable("");
+ }
+
+ BinaryBasicBlock *getBasicBlock() const {
+ switch (CurrentLocation) {
+ case _BinaryFunction:
+ return nullptr;
+ case _BinaryBasicBlock:
+ return U.BBRef.BB;
+ }
+ llvm_unreachable("");
+ }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &);
+
+struct NonPacProtectedRetGadget {
+ MCInstReference RetInst;
+ std::vector<MCInstReference> OverwritingRetRegInst;
+ bool operator==(const NonPacProtectedRetGadget &RHS) const {
+ return RetInst == RHS.RetInst &&
+ OverwritingRetRegInst == RHS.OverwritingRetRegInst;
+ }
+ NonPacProtectedRetGadget(
+ MCInstReference RetInst,
+ const std::vector<MCInstReference> &OverwritingRetRegInst)
+ : RetInst(RetInst), OverwritingRetRegInst(OverwritingRetRegInst) {}
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const NonPacProtectedRetGadget &NPPRG);
+class PacRetAnalysis;
+
+class NonPacProtectedRetAnalysis : public BinaryFunctionPass {
+ void runOnFunction(BinaryFunction &Function,
+ MCPlusBuilder::AllocatorIdTy AllocatorId);
+ SmallSet<MCPhysReg, 1>
+ computeDfState(PacRetAnalysis &PRA, BinaryFunction &BF,
+ MCPlusBuilder::AllocatorIdTy AllocatorId);
+ unsigned GadgetAnnotationIndex;
+
+public:
+ explicit NonPacProtectedRetAnalysis() : BinaryFunctionPass(false) {}
+
+ const char *getName() const override { return "non-pac-protected-rets"; }
+
+ /// Pass entry point
+ Error runOnFunctions(BinaryContext &BC) override;
+};
+
+} // namespace bolt
+} // namespace llvm
+
+#endif
----------------
atrosinenko wrote:
[nit] Github interface shows a marker here about an absent newline at end of file. I wonder what is expected according to the code style (same in `NonPacProtectedRetAnalysis.cpp`).
https://github.com/llvm/llvm-project/pull/122304
More information about the llvm-commits
mailing list