[llvm-branch-commits] [llvm] [Attributor][AMDGPU] Improve the handling of indirect calls (PR #100954)

Shilei Tian via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sun Jul 28 16:29:55 PDT 2024


https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/100954

None

>From 26e3c81b1488d32620f840d741966648e6d6c884 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Sun, 28 Jul 2024 19:24:31 -0400
Subject: [PATCH] [Attributor][AMDGPU] Improve the handling of indirect calls

---
 llvm/include/llvm/Transforms/IPO/Attributor.h  |  9 +++++----
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp    | 18 ++++++++++++++++++
 llvm/lib/Transforms/IPO/Attributor.cpp         |  2 +-
 .../Transforms/IPO/AttributorAttributes.cpp    |  3 ++-
 .../AMDGPU/amdgpu-attributor-no-agpr.ll        | 16 +++-------------
 5 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 34557238ecb23..596ee39c35a37 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1448,7 +1448,7 @@ struct AttributorConfig {
   /// Callback function to determine if an indirect call targets should be made
   /// direct call targets (with an if-cascade).
   std::function<bool(Attributor &A, const AbstractAttribute &AA, CallBase &CB,
-                     Function &AssummedCallee)>
+                     Function &AssummedCallee, bool IsSingleton)>
       IndirectCalleeSpecializationCallback = nullptr;
 
   /// Helper to update an underlying call graph and to delete functions.
@@ -1718,10 +1718,11 @@ struct Attributor {
   /// Return true if we should specialize the call site \b CB for the potential
   /// callee \p Fn.
   bool shouldSpecializeCallSiteForCallee(const AbstractAttribute &AA,
-                                         CallBase &CB, Function &Callee) {
+                                         CallBase &CB, Function &Callee,
+                                         bool IsSingleton) {
     return Configuration.IndirectCalleeSpecializationCallback
-               ? Configuration.IndirectCalleeSpecializationCallback(*this, AA,
-                                                                    CB, Callee)
+               ? Configuration.IndirectCalleeSpecializationCallback(
+                     *this, AA, CB, Callee, IsSingleton)
                : true;
   }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index ab98da31b050f..b8ab11a7b420b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -14,6 +14,7 @@
 #include "GCNSubtarget.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/Analysis/CycleAnalysis.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsR600.h"
@@ -1041,11 +1042,28 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
        &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
        &AAUnderlyingObjects::ID, &AAIndirectCallInfo::ID});
 
+  /// Helper to decide if we should specialize the indirect \p CB for \p Callee.
+  /// \p IsSingleton indicates whether the \p Callee is the only assumed callee.
+  auto IndirectCalleeSpecializationCallback =
+      [&](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
+          Function &Callee, bool IsSingleton) {
+        if (AMDGPU::isEntryFunctionCC(Callee.getCallingConv()))
+          return false;
+        // Singleton functions should be specialized.
+        if (IsSingleton)
+          return true;
+        // Otherwise specialize uniform values.
+        const auto &TTI = TM.getTargetTransformInfo(*CB.getCaller());
+        return TTI.isAlwaysUniform(CB.getCalledOperand());
+      };
+
   AttributorConfig AC(CGUpdater);
   AC.IsClosedWorldModule = HasWholeProgramVisibility;
   AC.Allowed = &Allowed;
   AC.IsModulePass = true;
   AC.DefaultInitializeLiveInternals = false;
+  AC.IndirectCalleeSpecializationCallback =
+      IndirectCalleeSpecializationCallback;
   AC.IPOAmendableCB = [](const Function &F) {
     return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
   };
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 910c0aeacc42e..879a26bcf328d 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -3836,7 +3836,7 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
   if (MaxSpecializationPerCB.getNumOccurrences()) {
     AC.IndirectCalleeSpecializationCallback =
         [&](Attributor &, const AbstractAttribute &AA, CallBase &CB,
-            Function &Callee) {
+            Function &Callee, bool IsSingleton) {
           if (MaxSpecializationPerCB == 0)
             return false;
           auto &Set = IndirectCalleeTrackingMap[&CB];
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 2816a85743faa..3f02ea1cbd6cb 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12347,7 +12347,8 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo {
     SmallVector<Function *, 8> SkippedAssumedCallees;
     SmallVector<std::pair<CallInst *, Instruction *>> NewCalls;
     for (Function *NewCallee : AssumedCallees) {
-      if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee)) {
+      if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee,
+                                               AssumedCallees.size() == 1)) {
         SkippedAssumedCallees.push_back(NewCallee);
         SpecializedForAllCallees = false;
         continue;
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index e5d440b96349f..d89dae1933365 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -231,19 +231,7 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
 ; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr(
 ; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
-; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
-; CHECK:       2:
-; CHECK-NEXT:    call void @also_empty()
-; CHECK-NEXT:    br label [[TMP6:%.*]]
-; CHECK:       3:
-; CHECK-NEXT:    br i1 true, label [[TMP4:%.*]], label [[TMP5:%.*]]
-; CHECK:       4:
-; CHECK-NEXT:    call void @empty()
-; CHECK-NEXT:    br label [[TMP6]]
-; CHECK:       5:
-; CHECK-NEXT:    unreachable
-; CHECK:       6:
+; CHECK-NEXT:    call void [[FPTR]](), !callees [[META0:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
   %fptr = select i1 %cond, ptr @empty, ptr @also_empty
@@ -265,3 +253,5 @@ attributes #0 = { "amdgpu-no-agpr" }
 ; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
 ; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-agpr" }
 ;.
+; CHECK: [[META0]] = !{ptr @also_empty, ptr @empty}
+;.



More information about the llvm-branch-commits mailing list