[clang] [compiler-rt] [llvm] [TypeProf][InstrFDO]Implement more efficient comparison sequence for indirect-call-promotion with vtable profiles. (PR #81442)
David Li via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 12 13:55:02 PDT 2024
================
@@ -103,30 +110,220 @@ static cl::opt<bool>
ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
cl::desc("Dump IR after transformation happens"));
+// This option is meant to be used by LLVM regression test and test the
+// transformation that compares vtables.
+static cl::opt<bool> ICPEnableVTableCmp(
+ "icp-enable-vtable-cmp", cl::init(false), cl::Hidden,
+ cl::desc("If ThinLTO and WPD is enabled and this option is true, "
+ "indirect-call promotion pass will compare vtables rather than "
+ "functions for speculative devirtualization of virtual calls."
+ " If set to false, indirect-call promotion pass will always "
+ "compare functions."));
+
+static cl::opt<float>
+ ICPVTableCountPercentage("icp-vtable-count-percentage", cl::init(0.99),
+ cl::Hidden,
+ cl::desc("Percentage of vtable count to compare"));
+
+static cl::opt<int> ICPNumAdditionalVTableLast(
+ "icp-num-additional-vtable-last", cl::init(0), cl::Hidden,
+ cl::desc("The number of additional instruction for the last candidate"));
+
namespace {
+using VTableAddressPointOffsetValMap =
+ SmallDenseMap<const GlobalVariable *, SmallDenseMap<int, Constant *, 4>, 8>;
+
+// A struct to collect type information for a virtual call site.
+struct VirtualCallSiteInfo {
+ // The offset from the address point to virtual function in the vtable.
+ uint64_t FunctionOffset;
+ // The instruction that computes the address point of vtable.
+ Instruction *VPtr;
+ // The compatible type used in LLVM type intrinsics.
+ StringRef CompatibleTypeStr;
+};
+
+// The key is a virtual call, and value is its type information.
+using VirtualCallSiteTypeInfoMap =
+ SmallDenseMap<const CallBase *, VirtualCallSiteInfo, 8>;
+
+// Find the offset where type string is `CompatibleType`.
+static std::optional<uint64_t>
+getCompatibleTypeOffset(const GlobalVariable &VTableVar,
+ StringRef CompatibleType) {
+ SmallVector<MDNode *, 2> Types; // type metadata associated with a vtable.
+ VTableVar.getMetadata(LLVMContext::MD_type, Types);
+
+ for (MDNode *Type : Types)
+ if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get());
+ TypeId && TypeId->getString() == CompatibleType)
+
+ return cast<ConstantInt>(
+ cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+ ->getZExtValue();
+
+ return std::nullopt;
+}
+
+// Returns a constant representing the vtable's address point specified by the
+// offset.
+static Constant *getVTableAddressPointOffset(GlobalVariable *VTable,
+ uint32_t AddressPointOffset) {
+ Module &M = *VTable->getParent();
+ LLVMContext &Context = M.getContext();
+ assert(AddressPointOffset <
+ M.getDataLayout().getTypeAllocSize(VTable->getValueType()) &&
+ "Out-of-bound access");
+
+ return ConstantExpr::getInBoundsGetElementPtr(
+ Type::getInt8Ty(Context), VTable,
+ llvm::ConstantInt::get(Type::getInt32Ty(Context), AddressPointOffset));
+}
+
+// Returns the basic block in which `Inst` by `Use`.
+static BasicBlock *getUserBasicBlock(Use &U, Instruction *UserInst) {
+ if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+ return PN->getIncomingBlock(U);
+
+ return UserInst->getParent();
+}
+
+// `DestBB` is a suitable basic block to sink `Inst` into when the following
+// conditions are true:
+// 1) `Inst->getParent()` is the sole predecessor of `DestBB`. This way `DestBB`
+// is dominated by `Inst->getParent()` and we don't need to sink across a
+// critical edge.
+// 2) `Inst` have users and all users are in `DestBB`.
+static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) {
+ BasicBlock *BB = Inst->getParent();
+ assert(Inst->getParent() != DestBB &&
+ BB->getTerminator()->getNumSuccessors() == 2 &&
+ "Caller should guarantee");
+ // Do not sink across a critical edge for simplicity.
+ if (DestBB->getUniquePredecessor() != BB)
+ return false;
+
+ // Now we know BB dominates DestBB.
+ BasicBlock *UserBB = nullptr;
+ for (Use &Use : Inst->uses()) {
+ User *User = Use.getUser();
+ // Do checked cast since IR verifier guarantees that the user of an
+ // instruction must be an instruction. See `Verifier::visitInstruction`.
+ Instruction *UserInst = cast<Instruction>(User);
+ // We can sink debug or pseudo instructions together with Inst.
+ if (UserInst->isDebugOrPseudoInst())
+ continue;
+ UserBB = getUserBasicBlock(Use, UserInst);
+ // Do not sink if Inst is used in a basic block that is not DestBB.
+ // TODO: Sink to the common dominator of all user blocks.
+ if (UserBB != DestBB)
+ return false;
+ }
+ return UserBB != nullptr;
+}
+
+// For the virtual call dispatch sequence, try to sink vtable load instructions
+// to the cold indirect call fallback.
+static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
+ assert(!I->isTerminator());
+ if (!isDestBBSuitableForSink(I, DestBlock))
+ return false;
+
+ assert(DestBlock->getUniquePredecessor() == I->getParent());
+
+ // Do not move control-flow-involving, volatile loads, vaarg, etc.
+ // Do not sink static or dynamic alloca instructions. Static allocas must
+ // remain in the entry block, and dynamic allocas must not be sunk in between
+ // a stacksave / stackrestore pair, which would incorrectly shorten its
+ // lifetime.
+ if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
+ isa<AllocaInst>(I))
+ return false;
+
+ // Do not sink convergent call instructions.
+ if (const auto *C = dyn_cast<CallBase>(I))
----------------
david-xl wrote:
ok
https://github.com/llvm/llvm-project/pull/81442
More information about the cfe-commits
mailing list