[llvm] [Instrumentor] Add Instrumentor pass (PR #138958)

Shilei Tian via llvm-commits llvm-commits at lists.llvm.org
Wed May 21 10:26:09 PDT 2025


================
@@ -0,0 +1,739 @@
+//===- Transforms/IPO/Instrumentor.h --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A highly configurable instrumentation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_INSTRUMENTOR_H
+#define LLVM_TRANSFORMS_IPO_INSTRUMENTOR_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/EnumeratedArray.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/StringSaver.h"
+#include "llvm/Transforms/Utils/Instrumentation.h"
+
+#include <bitset>
+#include <cstdint>
+#include <functional>
+#include <string>
+#include <tuple>
+
+namespace llvm {
+namespace instrumentor {
+
+struct InstrumentationConfig;
+struct InstrumentationOpportunity;
+
+struct InstrumentorIRBuilderTy {
+  InstrumentorIRBuilderTy(Module &M, FunctionAnalysisManager &FAM)
+      : M(M), Ctx(M.getContext()), FAM(FAM),
+        IRB(Ctx, ConstantFolder(),
+            IRBuilderCallbackInserter(
+                [&](Instruction *I) { NewInsts[I] = Epoche; })) {}
+
+  ~InstrumentorIRBuilderTy() {
+    for (auto *I : ToBeErased) {
+      if (!I->getType()->isVoidTy())
+        I->replaceAllUsesWith(PoisonValue::get(I->getType()));
+      I->eraseFromParent();
+    }
+  }
+
+  /// Get a temporary alloca to communicate (large) values with the runtime.
+  AllocaInst *getAlloca(Function *Fn, Type *Ty, bool MatchType = false) {
+    const DataLayout &DL = Fn->getDataLayout();
+    auto *&AllocaList = AllocaMap[{Fn, DL.getTypeAllocSize(Ty)}];
+    if (!AllocaList)
+      AllocaList = new AllocaListTy;
+    AllocaInst *AI = nullptr;
+    for (auto *&ListAI : *AllocaList) {
+      if (MatchType && ListAI->getAllocatedType() != Ty)
+        continue;
+      AI = ListAI;
+      ListAI = *AllocaList->rbegin();
+      break;
+    }
+    if (AI)
+      AllocaList->pop_back();
+    else
+      AI = new AllocaInst(Ty, DL.getAllocaAddrSpace(), "",
+                          Fn->getEntryBlock().begin());
+    UsedAllocas[AI] = AllocaList;
+    return AI;
+  }
+
+  /// Return the temporary allocas.
+  void returnAllocas() {
+    for (auto [AI, List] : UsedAllocas)
+      List->push_back(AI);
+    UsedAllocas.clear();
+  }
+
+  /// Commonly used values for IR inspection and creation.
+  ///{
+
+  Module &M;
+
+  /// The underying LLVM context.
+  LLVMContext &Ctx;
+
+  const DataLayout &DL = M.getDataLayout();
+
+  Type *VoidTy = Type::getVoidTy(Ctx);
+  Type *IntptrTy = M.getDataLayout().getIntPtrType(Ctx);
+  PointerType *PtrTy = PointerType::getUnqual(Ctx);
+  IntegerType *Int8Ty = Type::getInt8Ty(Ctx);
+  IntegerType *Int32Ty = Type::getInt32Ty(Ctx);
+  IntegerType *Int64Ty = Type::getInt64Ty(Ctx);
+  Constant *NullPtrVal = Constant::getNullValue(PtrTy);
+  ///}
+
+  /// Mapping to remember temporary allocas for reuse.
+  using AllocaListTy = SmallVector<AllocaInst *>;
+  DenseMap<std::pair<Function *, unsigned>, AllocaListTy *> AllocaMap;
+  DenseMap<AllocaInst *, SmallVector<AllocaInst *> *> UsedAllocas;
+
+  void eraseLater(Instruction *I) { ToBeErased.insert(I); }
+  SmallPtrSet<Instruction *, 32> ToBeErased;
+
+  FunctionAnalysisManager &FAM;
+
+  IRBuilder<ConstantFolder, IRBuilderCallbackInserter> IRB;
+
+  /// Each instrumentation, i.a., of an instruction, is happening in a dedicated
+  /// epoche. The epoche allows to determine if instrumentation instructions
+  /// were already around, due to prior instrumentations, or have been
+  /// introduced to support the current instrumentation, i.a., compute
+  /// information about the current instruction.
+  unsigned Epoche = 0;
+
+  /// A mapping from instrumentation instructions to the epoche they have been
+  /// created.
+  DenseMap<Instruction *, unsigned> NewInsts;
+};
+
+using GetterCallbackTy = std::function<Value *(
+    Value &, Type &, InstrumentationConfig &, InstrumentorIRBuilderTy &)>;
+using SetterCallbackTy = std::function<Value *(
+    Value &, Value &, InstrumentationConfig &, InstrumentorIRBuilderTy &)>;
+
+struct IRTArg {
+  enum IRArgFlagTy {
+    NONE = 0,
+    STRING = 1 << 0,
+    REPLACABLE = 1 << 1,
+    REPLACABLE_CUSTOM = 1 << 2,
+    POTENTIALLY_INDIRECT = 1 << 3,
+    INDIRECT_HAS_SIZE = 1 << 4,
+
+    LAST,
+  };
+
+  IRTArg(Type *Ty, StringRef Name, StringRef Description, unsigned Flags,
+         GetterCallbackTy GetterCB, SetterCallbackTy SetterCB = nullptr,
+         bool Enabled = true, bool NoCache = false)
+      : Enabled(Enabled), Ty(Ty), Name(Name), Description(Description),
+        Flags(Flags), GetterCB(std::move(GetterCB)),
+        SetterCB(std::move(SetterCB)), NoCache(NoCache) {}
+
+  bool Enabled;
+  Type *Ty;
+  StringRef Name;
+  StringRef Description;
+  unsigned Flags;
+  GetterCallbackTy GetterCB;
+  SetterCallbackTy SetterCB;
+  bool NoCache;
+};
+
+struct InstrumentationCaches {
+  DenseMap<std::tuple<unsigned, StringRef, StringRef>, Value *> DirectArgCache;
+  DenseMap<std::tuple<unsigned, StringRef, StringRef>, Value *>
+      IndirectArgCache;
+};
+
+struct IRTCallDescription {
+  IRTCallDescription(InstrumentationOpportunity &IConf, Type *RetTy = nullptr);
+
+  FunctionType *createLLVMSignature(InstrumentationConfig &IConf,
+                                    LLVMContext &Ctx, const DataLayout &DL,
+                                    bool ForceIndirection);
+  CallInst *createLLVMCall(Value *&V, InstrumentationConfig &IConf,
+                           InstrumentorIRBuilderTy &IIRB, const DataLayout &DL,
+                           InstrumentationCaches &ICaches);
+
+  bool isReplacable(IRTArg &IRTA) const {
+    return (IRTA.Flags & (IRTArg::REPLACABLE | IRTArg::REPLACABLE_CUSTOM));
+  }
+
+  bool isPotentiallyIndirect(IRTArg &IRTA) const {
+    return ((IRTA.Flags & IRTArg::POTENTIALLY_INDIRECT) ||
+            ((IRTA.Flags & IRTArg::REPLACABLE) && NumReplaceableArgs > 1));
+  }
+
+  bool RequiresIndirection = false;
+  bool MightRequireIndirection = false;
+  unsigned NumReplaceableArgs = 0;
+  InstrumentationOpportunity &IO;
+  Type *RetTy = nullptr;
+};
+
+struct InstrumentationLocation {
+  enum KindTy {
+    MODULE_PRE,
+    MODULE_POST,
+    GLOBAL_PRE,
+    GLOBAL_POST,
+    FUNCTION_PRE,
+    FUNCTION_POST,
+    BASIC_BLOCK_PRE,
+    BASIC_BLOCK_POST,
+    INSTRUCTION_PRE,
+    INSTRUCTION_POST,
+    SPECIAL_VALUE,
+    Last = SPECIAL_VALUE,
+  };
+
+  InstrumentationLocation(KindTy Kind) : Kind(Kind) {
+    assert(Kind != INSTRUCTION_PRE && Kind != INSTRUCTION_POST &&
+           "Opcode required!");
+  }
+
+  InstrumentationLocation(unsigned Opcode, bool IsPRE)
+      : Kind(IsPRE ? INSTRUCTION_PRE : INSTRUCTION_POST), Opcode(Opcode) {}
+
+  KindTy getKind() const { return Kind; }
+
+  static StringRef getKindStr(KindTy Kind) {
+    switch (Kind) {
+    case MODULE_PRE:
+      return "module_pre";
+    case MODULE_POST:
+      return "module_post";
+    case GLOBAL_PRE:
+      return "global_pre";
+    case GLOBAL_POST:
+      return "global_post";
+    case FUNCTION_PRE:
+      return "function_pre";
+    case FUNCTION_POST:
+      return "function_post";
+    case BASIC_BLOCK_PRE:
+      return "basic_block_pre";
+    case BASIC_BLOCK_POST:
+      return "basic_block_post";
+    case INSTRUCTION_PRE:
+      return "instruction_pre";
+    case INSTRUCTION_POST:
+      return "instruction_post";
+    case SPECIAL_VALUE:
+      return "special_value";
+    }
+    llvm_unreachable("Invalid kind!");
+  }
+  static KindTy getKindFromStr(StringRef S) {
+    return StringSwitch<KindTy>(S)
+        .Case("module_pre", MODULE_PRE)
+        .Case("module_post", MODULE_POST)
+        .Case("global_pre", GLOBAL_PRE)
+        .Case("global_post", GLOBAL_POST)
+        .Case("function_pre", FUNCTION_PRE)
+        .Case("function_post", FUNCTION_POST)
+        .Case("basic_block_pre", BASIC_BLOCK_PRE)
+        .Case("basic_block_post", BASIC_BLOCK_POST)
+        .Case("instruction_pre", INSTRUCTION_PRE)
+        .Case("instruction_post", INSTRUCTION_POST)
+        .Case("special_value", SPECIAL_VALUE)
+        .Default(Last);
+  }
+
+  static bool isPRE(KindTy Kind) {
+    switch (Kind) {
+    case MODULE_PRE:
+    case GLOBAL_PRE:
+    case FUNCTION_PRE:
+    case BASIC_BLOCK_PRE:
+    case INSTRUCTION_PRE:
+      return true;
+    case MODULE_POST:
+    case GLOBAL_POST:
+    case FUNCTION_POST:
+    case BASIC_BLOCK_POST:
+    case INSTRUCTION_POST:
+    case SPECIAL_VALUE:
+      return false;
+    }
+    llvm_unreachable("Invalid kind!");
+  }
+  bool isPRE() const { return isPRE(Kind); }
+
+  unsigned getOpcode() const {
+    assert((Kind == INSTRUCTION_PRE || Kind == INSTRUCTION_POST) &&
+           "Expected instruction!");
+    return Opcode;
+  }
+
+private:
+  const KindTy Kind;
+  const unsigned Opcode = -1;
+};
+
+struct BaseConfigurationOpportunity {
----------------
shiltian wrote:

document all these classes please

https://github.com/llvm/llvm-project/pull/138958


More information about the llvm-commits mailing list