[llvm] [FMV][GlobalOpt] Statically resolve calls to versioned functions. (PR #87939)
Alexandros Lamprineas via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 14 03:17:55 PST 2024
================
@@ -2641,6 +2641,150 @@ DeleteDeadIFuncs(Module &M,
return Changed;
}
+// Follows the use-def chain of \p V backwards until it finds a Function,
+// in which case it collects in \p Versions.
+static void collectVersions(Value *V, SmallVectorImpl<Function *> &Versions) {
+ if (auto *F = dyn_cast<Function>(V)) {
+ Versions.push_back(F);
+ } else if (auto *Sel = dyn_cast<SelectInst>(V)) {
+ collectVersions(Sel->getTrueValue(), Versions);
+ collectVersions(Sel->getFalseValue(), Versions);
+ } else if (auto *Phi = dyn_cast<PHINode>(V)) {
+ for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I)
+ collectVersions(Phi->getIncomingValue(I), Versions);
+ }
+}
+
+// Bypass the IFunc Resolver of MultiVersioned functions when possible. To
+// deduce whether the optimization is legal we need to compare the target
+// features between caller and callee versions. The criteria for bypassing
+// the resolver are the following:
+//
+// * If the callee's feature set is a subset of the caller's feature set,
+// then the callee is a candidate for direct call.
+//
+// * Among such candidates the one of highest priority is the best match
+// and it shall be picked, unless there is a version of the callee with
+// higher priority than the best match which cannot be picked from a
+// higher priority caller (directly or through the resolver).
+//
+// * For every higher priority callee version than the best match, there
+// is a higher priority caller version whose feature set availability
+// is implied by the callee's feature set.
+//
+static bool OptimizeNonTrivialIFuncs(
+ Module &M, function_ref<TargetTransformInfo &(Function &)> GetTTI) {
+ bool Changed = false;
+
+ // Cache containing the mask constructed from a function's target features.
+ DenseMap<Function *, uint64_t> FeatureMask;
+
+ for (GlobalIFunc &IF : M.ifuncs()) {
+ if (IF.isInterposable())
+ continue;
+
+ Function *Resolver = IF.getResolverFunction();
+ if (!Resolver)
+ continue;
+
+ if (Resolver->isInterposable())
+ continue;
+
+ // Discover the callee versions.
+ SmallVector<Function *> Callees;
+ for (BasicBlock &BB : *Resolver)
+ if (auto *Ret = dyn_cast_or_null<ReturnInst>(BB.getTerminator()))
+ collectVersions(Ret->getReturnValue(), Callees);
+
+ if (Callees.empty())
+ continue;
+
+ TargetTransformInfo &TTI = GetTTI(*Resolver);
+
+ // Cache the feature mask for each callee.
+ bool IsFMV = true;
+ for (Function *Callee : Callees) {
+ auto [It, Inserted] = FeatureMask.try_emplace(Callee);
+ if (Inserted) {
+ It->second = TTI.getFeatureMask(*Callee);
+ // Empty mask means this isn't an FMV callee.
+ if (It->second == 0) {
+ IsFMV = false;
+ break;
+ }
+ }
+ }
+
+ // This IFunc is not FMV.
+ if (!IsFMV)
+ continue;
+
+ // Sort the callee versions in decreasing priority order.
+ sort(Callees, [&](auto *LHS, auto *RHS) {
+ return FeatureMask[LHS] > FeatureMask[RHS];
----------------
labrinea wrote:
I may be mistaken here.The order of FEAT_* in the CPUFeatures enum is not always aligned with the priority order. We may need another way to compare.
https://github.com/llvm/llvm-project/pull/87939
More information about the llvm-commits
mailing list