[llvm] [AMDGPU] Infer amdgpu-no-flat-scratch-init attribute in AMDGPUAttributor (PR #94647)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 6 10:54:54 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-llvm-globalisel
Author: Jun Wang (jwanggit86)
<details>
<summary>Changes</summary>
The AMDGPUAnnotateKernelFeatures pass infers the "amdgpu-calls" and "amdgpu-stack-objects" attributes, which are used to infer whether we need to initialize flat scratch. This is, however, not precise. Instead, we should use AMDGPUAttributor and infer amdgpu-no-flat-scratch-init on kernels. Refer to https://github.com/llvm/llvm-project/issues/63586 .
---
Patch is 1.65 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/94647.diff
56 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributes.def (+1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+43)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (+3-7)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll (+14-4)
- (modified) llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll (+11-10)
- (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (+58-54)
- (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (+25-23)
- (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll (+9-9)
- (added) llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-globalisel.ll (+1028)
- (added) llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll (+914)
- (modified) llvm/test/CodeGen/AMDGPU/attributor-noopt.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll (+18-18)
- (modified) llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/flat-address-space.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll (+15-4)
- (modified) llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll (+15-15)
- (modified) llvm/test/CodeGen/AMDGPU/ipra.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll (+24-72)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.kernel.id.ll (+7-10)
- (modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll (+3-12)
- (modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll (+3-12)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll (+1380)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll (+75)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll (+1380)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll (+1380)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll (+66)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll (+1365)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll (+1320)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll (+273)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll (+15)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll (+276)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll (+261)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll (+18)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll (+276)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll (+276)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll (+9)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll (+6)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll (+34-25)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll (+18-12)
- (modified) llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll (+9-9)
- (modified) llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll (+22-22)
- (modified) llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll (+1-1)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
index bacc8e4e821e5..8c1c8219690ba 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
@@ -30,5 +30,6 @@ AMDGPU_ATTRIBUTE(WORKITEM_ID_Z, "amdgpu-no-workitem-id-z")
AMDGPU_ATTRIBUTE(LDS_KERNEL_ID, "amdgpu-no-lds-kernel-id")
AMDGPU_ATTRIBUTE(DEFAULT_QUEUE, "amdgpu-no-default-queue")
AMDGPU_ATTRIBUTE(COMPLETION_ACTION, "amdgpu-no-completion-action")
+AMDGPU_ATTRIBUTE(FLAT_SCRATCH_INIT, "amdgpu-no-flat-scratch-init")
#undef AMDGPU_ATTRIBUTE
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 43bfd0f13f875..8bdc9eab577a9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -433,6 +433,19 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
indicatePessimisticFixpoint();
return;
}
+
+ bool HasAllocaOrASCast = false;
+ for (BasicBlock &BB : *F) {
+ for (Instruction &I : BB) {
+ if (isa<AllocaInst>(I) || isa<AddrSpaceCastInst>(I)) {
+ HasAllocaOrASCast = true;
+ removeAssumedBits(FLAT_SCRATCH_INIT);
+ break;
+ }
+ }
+ if (HasAllocaOrASCast)
+ break;
+ }
}
ChangeStatus updateImpl(Attributor &A) override {
@@ -519,6 +532,9 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
removeAssumedBits(COMPLETION_ACTION);
+ if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A))
+ removeAssumedBits(FLAT_SCRATCH_INIT);
+
return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
: ChangeStatus::UNCHANGED;
}
@@ -677,6 +693,33 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
UsedAssumedInformation);
}
+
+ // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
+ // not to be set.
+ bool needFlatScratchInit(Attributor &A) {
+ // This is called on each callee; false means callee shouldn't have
+ // no-flat-scratch-init.
+ auto CheckForNoFlatScratchInit = [&](Instruction &I) {
+ const auto &CB = cast<CallBase>(I);
+ const Value *CalleeOp = CB.getCalledOperand();
+ const Function *Callee = dyn_cast<Function>(CalleeOp);
+ if (!Callee) // indirect call
+ return CB.isInlineAsm();
+
+ if (Callee->isIntrinsic())
+ return true;
+
+ const auto *CalleeInfo = A.getAAFor<AAAMDAttributes>(
+ *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
+ return CalleeInfo && CalleeInfo->isAssumed(FLAT_SCRATCH_INIT);
+ };
+
+ bool UsedAssumedInformation = false;
+ // If any callee is false (i.e. need FlatScratchInit),
+ // checkForAllCallLikeInstructions returns false
+ return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this,
+ UsedAssumedInformation);
+ }
};
AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 94ee4ac78142d..511e711bf724d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -1040,12 +1040,8 @@ GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo(const Function &F,
const CallingConv::ID CC = F.getCallingConv();
const bool IsKernel =
CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL;
- // FIXME: Should have analysis or something rather than attribute to detect
- // calls.
- const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
- // FIXME: This attribute is a hack, we just need an analysis on the function
- // to look for allocas.
- const bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
+ const bool NoFlatScratchInit =
+ F.hasFnAttribute("amdgpu-no-flat-scratch-init");
if (IsKernel && (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0))
KernargSegmentPtr = true;
@@ -1073,7 +1069,7 @@ GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo(const Function &F,
// lowering.
if (ST.hasFlatAddressSpace() && AMDGPU::isEntryFunctionCC(CC) &&
(IsAmdHsaOrMesa || ST.enableFlatScratch()) &&
- (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
+ (!NoFlatScratchInit || ST.enableFlatScratch()) &&
!ST.flatScratchIsArchitected()) {
FlatScratchInit = true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
index 8859ac69923a9..74ba17d4bf59d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
@@ -12,7 +12,9 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
; GFX8V4: ; %bb.0:
; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; GFX8V4-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x40
-; GFX8V4-NEXT: v_mov_b32_e32 v2, 1
+; GFX8V4-NEXT: s_add_i32 s8, s8, s11
+; GFX8V4-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
+; GFX8V4-NEXT: s_mov_b32 flat_scratch_lo, s9
; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX8V4-NEXT: s_mov_b32 s4, s0
; GFX8V4-NEXT: s_mov_b32 s5, s3
@@ -23,6 +25,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
; GFX8V4-NEXT: s_cmp_lg_u32 s1, -1
; GFX8V4-NEXT: v_mov_b32_e32 v0, s4
; GFX8V4-NEXT: s_cselect_b64 s[0:1], s[6:7], 0
+; GFX8V4-NEXT: v_mov_b32_e32 v2, 1
; GFX8V4-NEXT: v_mov_b32_e32 v1, s5
; GFX8V4-NEXT: flat_store_dword v[0:1], v2
; GFX8V4-NEXT: s_waitcnt vmcnt(0)
@@ -37,7 +40,9 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
; GFX8V5: ; %bb.0:
; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX8V5-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0xc8
-; GFX8V5-NEXT: v_mov_b32_e32 v2, 1
+; GFX8V5-NEXT: s_add_i32 s6, s6, s9
+; GFX8V5-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
+; GFX8V5-NEXT: s_mov_b32 flat_scratch_lo, s7
; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX8V5-NEXT: s_mov_b32 s4, s0
; GFX8V5-NEXT: s_mov_b32 s5, s2
@@ -47,6 +52,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
; GFX8V5-NEXT: s_cmp_lg_u32 s1, -1
; GFX8V5-NEXT: v_mov_b32_e32 v0, s4
; GFX8V5-NEXT: s_cselect_b64 s[0:1], s[2:3], 0
+; GFX8V5-NEXT: v_mov_b32_e32 v2, 1
; GFX8V5-NEXT: v_mov_b32_e32 v1, s5
; GFX8V5-NEXT: flat_store_dword v[0:1], v2
; GFX8V5-NEXT: s_waitcnt vmcnt(0)
@@ -60,9 +66,10 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
; GFX9V4-LABEL: addrspacecast:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9V4-NEXT: s_add_u32 flat_scratch_lo, s6, s9
+; GFX9V4-NEXT: s_addc_u32 flat_scratch_hi, s7, 0
; GFX9V4-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX9V4-NEXT: s_mov_b64 s[4:5], src_shared_base
-; GFX9V4-NEXT: v_mov_b32_e32 v2, 1
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V4-NEXT: s_mov_b32 s2, s0
; GFX9V4-NEXT: s_cmp_lg_u32 s0, -1
@@ -71,6 +78,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
; GFX9V4-NEXT: s_cmp_lg_u32 s1, -1
; GFX9V4-NEXT: v_mov_b32_e32 v0, s2
; GFX9V4-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
+; GFX9V4-NEXT: v_mov_b32_e32 v2, 1
; GFX9V4-NEXT: v_mov_b32_e32 v1, s3
; GFX9V4-NEXT: flat_store_dword v[0:1], v2
; GFX9V4-NEXT: s_waitcnt vmcnt(0)
@@ -84,9 +92,10 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
; GFX9V5-LABEL: addrspacecast:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9V5-NEXT: s_add_u32 flat_scratch_lo, s6, s9
+; GFX9V5-NEXT: s_addc_u32 flat_scratch_hi, s7, 0
; GFX9V5-NEXT: s_mov_b64 s[2:3], src_private_base
; GFX9V5-NEXT: s_mov_b64 s[4:5], src_shared_base
-; GFX9V5-NEXT: v_mov_b32_e32 v2, 1
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V5-NEXT: s_mov_b32 s2, s0
; GFX9V5-NEXT: s_cmp_lg_u32 s0, -1
@@ -95,6 +104,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
; GFX9V5-NEXT: s_cmp_lg_u32 s1, -1
; GFX9V5-NEXT: v_mov_b32_e32 v0, s2
; GFX9V5-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
+; GFX9V5-NEXT: v_mov_b32_e32 v2, 1
; GFX9V5-NEXT: v_mov_b32_e32 v1, s3
; GFX9V5-NEXT: flat_store_dword v[0:1], v2
; GFX9V5-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
index cff9ce0506679..96bbcb7ed2149 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -233,9 +233,9 @@ attributes #1 = { nounwind }
; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
;.
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
;.
; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index 33b1cc65dc569..5ace66fd2dd76 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -116,7 +116,7 @@ define amdgpu_kernel void @kernel_calls_extern() {
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
; CHECK-SAME: ) #[[ATTR4]] {
-; CHECK-NEXT: call void @unknown() #[[ATTR9:[0-9]+]]
+; CHECK-NEXT: call void @unknown() #[[ATTR10:[0-9]+]]
; CHECK-NEXT: ret void
;
call void @unknown() #0
@@ -136,7 +136,7 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR4]] {
-; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR9]]
+; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR10]]
; CHECK-NEXT: ret void
;
call void %indirect() #0
@@ -229,7 +229,7 @@ define amdgpu_kernel void @kernel_calls_workitem_id_x(ptr addrspace(1) %out) {
define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr(
-; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR7:[0-9]+]] {
; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
; CHECK-NEXT: call void [[FPTR]]()
; CHECK-NEXT: ret void
@@ -242,14 +242,15 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
attributes #0 = { "amdgpu-no-agpr" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR4]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-agpr" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR10]] = { "amdgpu-no-agpr" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kern...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/94647
More information about the llvm-commits
mailing list