[clang] [compiler-rt] [llvm] [TypeProf][InstrFDO]Implement more efficient comparison sequence for indirect-call-promotion with vtable profiles. (PR #81442)

Mingming Liu via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 6 23:15:42 PDT 2024


================
@@ -103,30 +110,220 @@ static cl::opt<bool>
     ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
                  cl::desc("Dump IR after transformation happens"));
 
+// This option is meant to be used by LLVM regression test and test the
+// transformation that compares vtables.
+static cl::opt<bool> ICPEnableVTableCmp(
+    "icp-enable-vtable-cmp", cl::init(false), cl::Hidden,
+    cl::desc("If ThinLTO and WPD is enabled and this option is true, "
+             "indirect-call promotion pass will compare vtables rather than "
+             "functions for speculative devirtualization of virtual calls."
+             " If set to false, indirect-call promotion pass will always "
+             "compare functions."));
+
+static cl::opt<float>
+    ICPVTableCountPercentage("icp-vtable-count-percentage", cl::init(0.99),
+                             cl::Hidden,
+                             cl::desc("Percentage of vtable count to compare"));
+
+static cl::opt<int> ICPNumAdditionalVTableLast(
+    "icp-num-additional-vtable-last", cl::init(0), cl::Hidden,
+    cl::desc("The number of additional instruction for the last candidate"));
+
 namespace {
 
+using VTableAddressPointOffsetValMap =
+    SmallDenseMap<const GlobalVariable *, SmallDenseMap<int, Constant *, 4>, 8>;
+
+// A struct to collect type information for a virtual call site.
+struct VirtualCallSiteInfo {
+  // The offset from the address point to virtual function in the vtable.
+  uint64_t FunctionOffset;
+  // The instruction that computes the address point of vtable.
+  Instruction *VPtr;
+  // The compatible type used in LLVM type intrinsics.
+  StringRef CompatibleTypeStr;
+};
+
+// The key is a virtual call, and value is its type information.
+using VirtualCallSiteTypeInfoMap =
+    SmallDenseMap<const CallBase *, VirtualCallSiteInfo, 8>;
+
+// Find the offset where type string is `CompatibleType`.
+static std::optional<uint64_t>
+getCompatibleTypeOffset(const GlobalVariable &VTableVar,
+                        StringRef CompatibleType) {
+  SmallVector<MDNode *, 2> Types; // type metadata associated with a vtable.
+  VTableVar.getMetadata(LLVMContext::MD_type, Types);
+
+  for (MDNode *Type : Types)
+    if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get());
+        TypeId && TypeId->getString() == CompatibleType)
+
+      return cast<ConstantInt>(
+                 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+          ->getZExtValue();
+
+  return std::nullopt;
+}
+
+// Returns a constant representing the vtable's address point specified by the
+// offset.
+static Constant *getVTableAddressPointOffset(GlobalVariable *VTable,
+                                             uint32_t AddressPointOffset) {
+  Module &M = *VTable->getParent();
+  LLVMContext &Context = M.getContext();
+  assert(AddressPointOffset <
+             M.getDataLayout().getTypeAllocSize(VTable->getValueType()) &&
+         "Out-of-bound access");
+
+  return ConstantExpr::getInBoundsGetElementPtr(
+      Type::getInt8Ty(Context), VTable,
+      llvm::ConstantInt::get(Type::getInt32Ty(Context), AddressPointOffset));
+}
+
+// Returns the basic block in which `Inst` by `Use`.
+static BasicBlock *getUserBasicBlock(Use &U, Instruction *UserInst) {
+  if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+    return PN->getIncomingBlock(U);
+
+  return UserInst->getParent();
+}
+
+// `DestBB` is a suitable basic block to sink `Inst` into when the following
+// conditions are true:
+// 1) `Inst->getParent()` is the sole predecessor of `DestBB`. This way `DestBB`
+//    is dominated by `Inst->getParent()` and we don't need to sink across a
+//    critical edge.
+// 2) `Inst` have users and all users are in `DestBB`.
+static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) {
+  BasicBlock *BB = Inst->getParent();
+  assert(Inst->getParent() != DestBB &&
+         BB->getTerminator()->getNumSuccessors() == 2 &&
+         "Caller should guarantee");
+  // Do not sink across a critical edge for simplicity.
+  if (DestBB->getUniquePredecessor() != BB)
+    return false;
+
+  // Now we know BB dominates DestBB.
+  BasicBlock *UserBB = nullptr;
+  for (Use &Use : Inst->uses()) {
+    User *User = Use.getUser();
+    // Do checked cast since IR verifier guarantees that the user of an
+    // instruction must be an instruction. See `Verifier::visitInstruction`.
+    Instruction *UserInst = cast<Instruction>(User);
+    // We can sink debug or pseudo instructions together with Inst.
+    if (UserInst->isDebugOrPseudoInst())
+      continue;
+    UserBB = getUserBasicBlock(Use, UserInst);
+    // Do not sink if Inst is used in a basic block that is not DestBB.
+    // TODO: Sink to the common dominator of all user blocks.
+    if (UserBB != DestBB)
+      return false;
+  }
+  return UserBB != nullptr;
+}
+
+// For the virtual call dispatch sequence, try to sink vtable load instructions
+// to the cold indirect call fallback.
+static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
+  assert(!I->isTerminator());
+  if (!isDestBBSuitableForSink(I, DestBlock))
+    return false;
+
+  assert(DestBlock->getUniquePredecessor() == I->getParent());
+
+  // Do not move control-flow-involving, volatile loads, vaarg, etc.
+  // Do not sink static or dynamic alloca instructions. Static allocas must
+  // remain in the entry block, and dynamic allocas must not be sunk in between
+  // a stacksave / stackrestore pair, which would incorrectly shorten its
+  // lifetime.
+  if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
+      isa<AllocaInst>(I))
+    return false;
+
+  // Do not sink convergent call instructions.
+  if (const auto *C = dyn_cast<CallBase>(I))
+    if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
+      return false;
+
+  // Do not move an instruction that may write to memory.
+  if (I->mayWriteToMemory())
+    return false;
+
+  // We can only sink load instructions if there is nothing between the load and
+  // the end of block that could change the value.
+  if (I->mayReadFromMemory()) {
+    // We know that SrcBlock is the unique predecessor of DestBlock.
+    for (BasicBlock::iterator Scan = std::next(I->getIterator()),
+                              E = I->getParent()->end();
+         Scan != E; ++Scan)
+      if (Scan->mayWriteToMemory())
+        return false;
+  }
+
+  BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
+  I->moveBefore(*DestBlock, InsertPos);
+
+  // TODO: Sink debug intrinsic users of I to 'DestBlock'.
+  // 'InstCombinerImpl::tryToSinkInstructionDbgValues' and
+  // 'InstCombinerImpl::tryToSinkInstructionDbgVariableRecords' already have
+  // the core logic to do this.
+  return true;
+}
+
+// Try to sink instructions after VPtr to the indirect call fallback.
+// Returns the number of sunk IR instructions.
+static int tryToSinkInstructions(Instruction *VPtr,
+                                 BasicBlock *IndirectCallBB) {
+  BasicBlock *OriginalBB = VPtr->getParent();
+
+  int SinkCount = 0;
+  // FIXME: Find a way to bail out of the loop.
+  for (Instruction &I :
+       llvm::make_early_inc_range(llvm::drop_begin(llvm::reverse(*OriginalBB))))
+    if (tryToSinkInstruction(&I, IndirectCallBB))
+      SinkCount++;
+
+  return SinkCount;
+}
+
 // Promote indirect calls to conditional direct calls, keeping track of
 // thresholds.
 class IndirectCallPromoter {
 private:
   Function &F;
+  Module &M;
+
+  ProfileSummaryInfo *PSI = nullptr;
 
   // Symtab that maps indirect call profile values to function names and
   // defines.
   InstrProfSymtab *const Symtab;
 
   const bool SamplePGO;
 
+  // A map from a virtual call to its type information.
+  const VirtualCallSiteTypeInfoMap &VirtualCSInfo;
+
+  VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal;
+
   OptimizationRemarkEmitter &ORE;
 
   // A struct that records the direct target and it's call count.
   struct PromotionCandidate {
     Function *const TargetFunction;
     const uint64_t Count;
 
+    // The byte offset of TargetFunction starting from the vtable address point.
+    uint64_t FunctionOffset;
+    SmallVector<std::pair<uint64_t, uint64_t>, 2> VTableGUIDAndCounts;
----------------
minglotus-6 wrote:

Done. I also made two other change to struct fields
1. Remove `FunctionOffset` field from struct `PromotionCandidate`. The field is not used now.
2. Use `SmallDenseMap<uint64_t, uint64_t>` to store per vtable-guid counters (not `SmallVector<pair<uint64_t, uint64_t>>`

https://github.com/llvm/llvm-project/pull/81442


More information about the cfe-commits mailing list