[llvm] [MemProf] Support cloning for indirect calls with ThinLTO (PR #110625)

Snehasish Kumar via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 4 10:47:54 PDT 2024


================
@@ -4075,24 +4367,80 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
                      << ore::NV("Attribute", AllocTypeString));
           }
         } else if (!CallsiteContext.empty()) {
-          // Consult the next callsite node.
-          assert(SI != FS->callsites().end());
-          auto &StackNode = *(SI++);
-
+          if (!CalledFunction) {
+            // This is an indirect call, see if we have profile information and
+            // whether any clones were recorded for the profiled targets (that
+            // we synthesized CallsiteInfo summary records for when building the
+            // index).
 #ifndef NDEBUG
-          // Sanity check that the stack ids match between the summary and
-          // instruction metadata.
-          auto StackIdIndexIter = StackNode.StackIdIndices.begin();
-          for (auto StackId : CallsiteContext) {
-            assert(StackIdIndexIter != StackNode.StackIdIndices.end());
-            assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
-                   StackId);
-            StackIdIndexIter++;
+            // We should have skipped inline assembly calls.
+            auto *CI = dyn_cast<CallInst>(CB);
+            assert(!CI || !CI->isInlineAsm());
+#endif
+            // We should have skipped direct calls via a Constant.
+            assert(CalledValue && !isa<Constant>(CalledValue));
+
+            uint32_t NumCandidates;
+            uint64_t TotalCount;
+            auto CandidateProfileData =
+                ICallAnalysis.getPromotionCandidatesForInstruction(
+                    CB, TotalCount, NumCandidates);
+            if (!CandidateProfileData.empty()) {
+              unsigned CallsiteInfoStartStartIndex =
+                  static_cast<unsigned int>(SI - FS->callsites().begin());
+              // Iterate past all of the associated callsites nodes and check
+              // them.
+              for (const auto &Candidate : CandidateProfileData) {
+#ifndef NDEBUG
+                auto CalleeValueInfo =
+#endif
+                    ImportSummary->getValueInfo(Candidate.Value);
+                // We might not have a ValueInfo if this is a distributed
+                // ThinLTO backend and decided not to import that function.
+                assert(!CalleeValueInfo || SI->Callee == CalleeValueInfo);
+                assert(SI != FS->callsites().end());
+                auto &StackNode = *(SI++);
+                // See if any of the clones of the indirect callsite for this
+                // profiled target should call a cloned version of the profiled
+                // target. We only need to do the ICP here if so.
+                for (auto CloneNo : StackNode.Clones) {
+                  if (!CloneNo)
+                    continue;
+                  // Save information for ICP, which is performed later to avoid
+                  // messing up the current function traversal.
+                  ICallAnalysisMap[CB] = {CandidateProfileData.vec(),
+                                          NumCandidates, TotalCount,
+                                          CallsiteInfoStartStartIndex};
+                  break;
+                }
+                // Perform cloning if not yet done. This is done here in case
+                // we don't need to do ICP, but might need to clone this
+                // function as it is the target of other cloned calls.
+                CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size());
+              }
+            }
           }
+
+          else {
+            // Consult the next callsite node.
+            assert(SI != FS->callsites().end());
+            auto &StackNode = *(SI++);
+
+#ifndef NDEBUG
+            // Sanity check that the stack ids match between the summary and
+            // instruction metadata.
+            auto StackIdIndexIter = StackNode.StackIdIndices.begin();
+            for (auto StackId : CallsiteContext) {
+              assert(StackIdIndexIter != StackNode.StackIdIndices.end());
+              assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
+                     StackId);
+              StackIdIndexIter++;
+            }
 #endif
 
-          CloneCallsite(StackNode, CB, CalledFunction);
-        } else if (CB->isTailCall()) {
+            CloneCallsite(StackNode, CB, CalledFunction);
+          }
+        } else if (CB->isTailCall() && CalledFunction) {
----------------
snehasish wrote:

Is it necessary to check CalledFunction here? I believe a tail call should always be direct and thus we should always have CalledFunction.

Maybe an assert is more appropriate if you agree.

https://github.com/llvm/llvm-project/pull/110625


More information about the llvm-commits mailing list