[llvm] [FMV][GlobalOpt] Statically resolve calls to versioned functions. (PR #87939)

Alexandros Lamprineas via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 14 03:17:55 PST 2024


================
@@ -2641,6 +2641,150 @@ DeleteDeadIFuncs(Module &M,
   return Changed;
 }
 
+// Follows the use-def chain of \p V backwards until it finds a Function,
+// in which case it collects in \p Versions.
+static void collectVersions(Value *V, SmallVectorImpl<Function *> &Versions) {
+  if (auto *F = dyn_cast<Function>(V)) {
+    Versions.push_back(F);
+  } else if (auto *Sel = dyn_cast<SelectInst>(V)) {
+    collectVersions(Sel->getTrueValue(), Versions);
+    collectVersions(Sel->getFalseValue(), Versions);
+  } else if (auto *Phi = dyn_cast<PHINode>(V)) {
+    for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I)
+      collectVersions(Phi->getIncomingValue(I), Versions);
+  }
+}
+
+// Bypass the IFunc Resolver of MultiVersioned functions when possible. To
+// deduce whether the optimization is legal we need to compare the target
+// features between caller and callee versions. The criteria for bypassing
+// the resolver are the following:
+//
+// * If the callee's feature set is a subset of the caller's feature set,
+//   then the callee is a candidate for direct call.
+//
+// * Among such candidates the one of highest priority is the best match
+//   and it shall be picked, unless there is a version of the callee with
+//   higher priority than the best match which cannot be picked from a
+//   higher priority caller (directly or through the resolver).
+//
+// * For every higher priority callee version than the best match, there
+//   is a higher priority caller version whose feature set availability
+//   is implied by the callee's feature set.
+//
+static bool OptimizeNonTrivialIFuncs(
+    Module &M, function_ref<TargetTransformInfo &(Function &)> GetTTI) {
+  bool Changed = false;
+
+  // Cache containing the mask constructed from a function's target features.
+  DenseMap<Function *, uint64_t> FeatureMask;
+
+  for (GlobalIFunc &IF : M.ifuncs()) {
+    if (IF.isInterposable())
+      continue;
+
+    Function *Resolver = IF.getResolverFunction();
+    if (!Resolver)
+      continue;
+
+    if (Resolver->isInterposable())
+      continue;
+
+    // Discover the callee versions.
+    SmallVector<Function *> Callees;
+    for (BasicBlock &BB : *Resolver)
+      if (auto *Ret = dyn_cast_or_null<ReturnInst>(BB.getTerminator()))
+        collectVersions(Ret->getReturnValue(), Callees);
+
+    if (Callees.empty())
+      continue;
+
+    TargetTransformInfo &TTI = GetTTI(*Resolver);
+
+    // Cache the feature mask for each callee.
+    bool IsFMV = true;
+    for (Function *Callee : Callees) {
+      auto [It, Inserted] = FeatureMask.try_emplace(Callee);
+      if (Inserted) {
+        It->second = TTI.getFeatureMask(*Callee);
+        // Empty mask means this isn't an FMV callee.
+        if (It->second == 0) {
+          IsFMV = false;
+          break;
+        }
+      }
+    }
+
+    // This IFunc is not FMV.
+    if (!IsFMV)
+      continue;
+
+    // Sort the callee versions in decreasing priority order.
+    sort(Callees, [&](auto *LHS, auto *RHS) {
+      return FeatureMask[LHS] > FeatureMask[RHS];
----------------
labrinea wrote:

I may be mistaken here.The order of FEAT_* in the CPUFeatures enum is not always aligned with the priority order. We may need another way to compare.

https://github.com/llvm/llvm-project/pull/87939


More information about the llvm-commits mailing list