[llvm] 1ca9fe6 - Reapply "[Attributor][AMDGPU] Enable AAIndirectCallInfo for AMDAttributor (#100952)"
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 14 14:16:55 PDT 2024
Author: Shilei Tian
Date: 2024-08-14T17:16:47-04:00
New Revision: 1ca9fe6db3345556c5c6853b3aba8ff209e572df
URL: https://github.com/llvm/llvm-project/commit/1ca9fe6db3345556c5c6853b3aba8ff209e572df
DIFF: https://github.com/llvm/llvm-project/commit/1ca9fe6db3345556c5c6853b3aba8ff209e572df.diff
LOG: Reapply "[Attributor][AMDGPU] Enable AAIndirectCallInfo for AMDAttributor (#100952)"
This reverts commit 36467bfe89f231458eafda3edb916c028f1f0619.
Added:
Modified:
llvm/include/llvm/Transforms/IPO/Attributor.h
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
llvm/lib/Transforms/IPO/Attributor.cpp
llvm/lib/Transforms/IPO/AttributorAttributes.cpp
llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index ad3c6426efd2fe..718cf704cbdf1a 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1448,7 +1448,7 @@ struct AttributorConfig {
/// Callback function to determine if an indirect call targets should be made
/// direct call targets (with an if-cascade).
std::function<bool(Attributor &A, const AbstractAttribute &AA, CallBase &CB,
- Function &AssummedCallee)>
+ Function &AssumedCallee, unsigned NumAssumedCallees)>
IndirectCalleeSpecializationCallback = nullptr;
/// Helper to update an underlying call graph and to delete functions.
@@ -1718,10 +1718,11 @@ struct Attributor {
/// Return true if we should specialize the call site \b CB for the potential
/// callee \p Fn.
bool shouldSpecializeCallSiteForCallee(const AbstractAttribute &AA,
- CallBase &CB, Function &Callee) {
+ CallBase &CB, Function &Callee,
+ unsigned NumAssumedCallees) {
return Configuration.IndirectCalleeSpecializationCallback
- ? Configuration.IndirectCalleeSpecializationCallback(*this, AA,
- CB, Callee)
+ ? Configuration.IndirectCalleeSpecializationCallback(
+ *this, AA, CB, Callee, NumAssumedCallees)
: true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 6cee8979596388..d65e0ae92308e6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -14,6 +14,7 @@
#include "GCNSubtarget.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/CycleAnalysis.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
@@ -1039,13 +1040,26 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
&AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
&AAPointerInfo::ID, &AAPotentialConstantValues::ID,
- &AAUnderlyingObjects::ID, &AAAddressSpace::ID});
+ &AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
+ &AAInstanceInfo::ID});
AttributorConfig AC(CGUpdater);
AC.IsClosedWorldModule = Options.IsClosedWorld;
AC.Allowed = &Allowed;
AC.IsModulePass = true;
AC.DefaultInitializeLiveInternals = false;
+ AC.IndirectCalleeSpecializationCallback =
+ [&TM](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
+ Function &Callee, unsigned NumAssumedCallees) {
+ if (AMDGPU::isEntryFunctionCC(Callee.getCallingConv()))
+ return false;
+ // Singleton functions can be specialized.
+ if (NumAssumedCallees == 1)
+ return true;
+ // Otherwise specialize uniform values.
+ const auto &TTI = TM.getTargetTransformInfo(*CB.getCaller());
+ return TTI.isAlwaysUniform(CB.getCalledOperand());
+ };
AC.IPOAmendableCB = [](const Function &F) {
return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
};
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 910c0aeacc42e0..38b61b6a88357c 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -3836,7 +3836,7 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
if (MaxSpecializationPerCB.getNumOccurrences()) {
AC.IndirectCalleeSpecializationCallback =
[&](Attributor &, const AbstractAttribute &AA, CallBase &CB,
- Function &Callee) {
+ Function &Callee, unsigned) {
if (MaxSpecializationPerCB == 0)
return false;
auto &Set = IndirectCalleeTrackingMap[&CB];
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 8ece5bbdfc77e1..41a7fc0870cf0a 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12357,7 +12357,8 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo {
SmallVector<Function *, 8> SkippedAssumedCallees;
SmallVector<std::pair<CallInst *, Instruction *>> NewCalls;
for (Function *NewCallee : AssumedCallees) {
- if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee)) {
+ if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee,
+ AssumedCallees.size())) {
SkippedAssumedCallees.push_back(NewCallee);
SpecializedForAllCallees = false;
continue;
diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
index 386f9cd3f9ce73..aa182b720c6042 100644
--- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
@@ -15,7 +15,7 @@ define internal void @direct() {
; CHECK-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
; CHECK-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
; CHECK-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
-; CHECK-NEXT: call void [[FP]]()
+; CHECK-NEXT: call void @indirect()
; CHECK-NEXT: ret void
;
%fptr = alloca ptr, addrspace(5)
@@ -36,5 +36,5 @@ define amdgpu_kernel void @test_direct_indirect_call() {
}
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
index 05558c555c581e..848019c8729251 100644
--- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
+++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
@@ -27,7 +27,7 @@ define amdgpu_kernel void @test_simple_indirect_call() #0 {
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
-; ATTRIBUTOR_GCN-NEXT: call void [[FP]]()
+; ATTRIBUTOR_GCN-NEXT: call void @indirect()
; ATTRIBUTOR_GCN-NEXT: ret void
;
%fptr = alloca ptr, addrspace(5)
@@ -43,5 +43,5 @@ attributes #0 = { "amdgpu-no-dispatch-id" }
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" }
;.
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
index c2b43f9e000585..850446c414049d 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
@@ -32,14 +32,22 @@ entry:
}
define amdgpu_kernel void @foo(ptr noundef %fp) {
-; CHECK-LABEL: define {{[^@]+}}@foo
-; CHECK-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; CHECK-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8
-; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(5) [[FP_ADDR]], align 8
-; CHECK-NEXT: call void [[LOAD]]()
-; CHECK-NEXT: ret void
+; OW-LABEL: define {{[^@]+}}@foo
+; OW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] {
+; OW-NEXT: entry:
+; OW-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; OW-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8
+; OW-NEXT: call void [[FP]]()
+; OW-NEXT: ret void
+;
+; CW-LABEL: define {{[^@]+}}@foo
+; CW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] {
+; CW-NEXT: entry:
+; CW-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; CW-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8
+; CW-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(5) [[FP_ADDR]], align 8
+; CW-NEXT: call void @bar()
+; CW-NEXT: ret void
;
entry:
%fp.addr = alloca ptr, addrspace(5)
@@ -50,9 +58,9 @@ entry:
}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
+; OW: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; OW: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
+;.
+; CW: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CW: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CW: {{.*}}
-; OW: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
index 3a6b0485d24174..cca7b49996ff3b 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -36,7 +36,7 @@ define amdgpu_kernel void @test_simple_indirect_call() {
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
-; ATTRIBUTOR_GCN-NEXT: call void [[FP]]()
+; ATTRIBUTOR_GCN-NEXT: call void @indirect()
; ATTRIBUTOR_GCN-NEXT: ret void
;
; GFX9-LABEL: test_simple_indirect_call:
@@ -81,7 +81,7 @@ define amdgpu_kernel void @test_simple_indirect_call() {
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
;.
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
; AKF_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
;.
More information about the llvm-commits
mailing list