[clang] [compiler-rt] [llvm] [TypeProf][InstrFDO]Implement more efficient comparison sequence for indirect-call-promotion with vtable profiles. (PR #81442)

Teresa Johnson via cfe-commits cfe-commits at lists.llvm.org
Mon Jun 24 20:46:09 PDT 2024


================
@@ -103,27 +112,226 @@ static cl::opt<bool>
     ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
                  cl::desc("Dump IR after transformation happens"));
 
+// Indirect call promotion pass will fall back to function-based comparison if
+// vtable-count / function-count is smaller than this threshold.
+static cl::opt<float> ICPVTablePercentageThreshold(
+    "icp-vtable-percentage-threshold", cl::init(0.99), cl::Hidden,
+    cl::desc("The percentage threshold of vtable-count / function-count for "
+             "cost-benefit analysis. "));
+
+// Although comparing vtables can save a vtable load, we may need to compare
+// vtable pointer with multiple vtable address points due to class inheritance.
+// Comparing with multiple vtables inserts additional instructions on hot code
+// path; and doing so for earlier candidate of one icall can affect later
+// function candidate in an undesired way. We allow multiple vtable comparison
+// for the last function candidate and use the option below to cap the number
+// of vtables.
+static cl::opt<int> ICPMaxNumVTableLastCandidate(
+    "icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden,
+    cl::desc("The maximum number of vtable for the last candidate."));
+
 namespace {
 
+// The key is a vtable global variable, and the value is a map.
+// In the inner map, the key represents address point offsets and the value is a
+// constant for this address point.
+using VTableAddressPointOffsetValMap =
+    SmallDenseMap<const GlobalVariable *, std::unordered_map<int, Constant *>>;
+
+// A struct to collect type information for a virtual call site.
+struct VirtualCallSiteInfo {
+  // The offset from the address point to virtual function in the vtable.
+  uint64_t FunctionOffset;
+  // The instruction that computes the address point of vtable.
+  Instruction *VPtr;
+  // The compatible type used in LLVM type intrinsics.
+  StringRef CompatibleTypeStr;
+};
+
+// The key is a virtual call, and value is its type information.
+using VirtualCallSiteTypeInfoMap =
+    SmallDenseMap<const CallBase *, VirtualCallSiteInfo>;
+
+// The key is vtable GUID, and value is its value profile count.
+using VTableGUIDCountsMap = SmallDenseMap<uint64_t, uint64_t, 16>;
+
+// Returns the address point offset of the given compatible type.
+//
+// Type metadata of a vtable specifies the types that can container a pointer to
+// this vtable, for example, `Base*` can be a pointer to an instantiated type
+// but not vice versa. See also https://llvm.org/docs/TypeMetadata.html
+static std::optional<uint64_t>
+getAddressPointOffset(const GlobalVariable &VTableVar,
+                      StringRef CompatibleType) {
+  SmallVector<MDNode *> Types;
+  VTableVar.getMetadata(LLVMContext::MD_type, Types);
+
+  for (MDNode *Type : Types)
+    if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get());
+        TypeId && TypeId->getString() == CompatibleType)
+      return cast<ConstantInt>(
+                 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+          ->getZExtValue();
+
+  return std::nullopt;
+}
+
+// Returns a constant representing the vtable's address point specified by the
+// offset.
+static Constant *getVTableAddressPointOffset(GlobalVariable *VTable,
+                                             uint32_t AddressPointOffset) {
+  Module &M = *VTable->getParent();
+  LLVMContext &Context = M.getContext();
+  assert(AddressPointOffset <
+             M.getDataLayout().getTypeAllocSize(VTable->getValueType()) &&
+         "Out-of-bound access");
+
+  return ConstantExpr::getInBoundsGetElementPtr(
+      Type::getInt8Ty(Context), VTable,
+      llvm::ConstantInt::get(Type::getInt32Ty(Context), AddressPointOffset));
+}
+
+// Returns the basic block in which `Inst` is used via its `UserInst`.
+static BasicBlock *getUserBasicBlock(Use &U, Instruction *UserInst) {
+  if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+    return PN->getIncomingBlock(U);
+
+  return UserInst->getParent();
+}
+
+// `DestBB` is a suitable basic block to sink `Inst` into when the following
+// conditions are true:
+// 1) `Inst->getParent()` is the sole predecessor of `DestBB`. This way `DestBB`
+//    is dominated by `Inst->getParent()` and we don't need to sink across a
+//    critical edge.
+// 2) `Inst` have users and all users are in `DestBB`.
+static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) {
+  BasicBlock *BB = Inst->getParent();
+  assert(Inst->getParent() != DestBB &&
+         BB->getTerminator()->getNumSuccessors() == 2 &&
+         "Guaranteed by ICP transformation");
+  // Do not sink across a critical edge for simplicity.
+  if (DestBB->getUniquePredecessor() != BB)
----------------
teresajohnson wrote:

It looks like tryToSinkInstruction is called repeatedly for insts all from the same BB, and for the same DestBB - consider hoisting this check out to the caller of tryToSinkInstruction (i.e. into tryToSinkInstructions).

https://github.com/llvm/llvm-project/pull/81442


More information about the cfe-commits mailing list