[llvm] [AMDGPU] Infer amdgpu-no-flat-scratch-init attribute in AMDGPUAttributor (PR #94647)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 6 10:54:54 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-globalisel

Author: Jun Wang (jwanggit86)

<details>
<summary>Changes</summary>

The AMDGPUAnnotateKernelFeatures pass infers the "amdgpu-calls" and "amdgpu-stack-objects" attributes, which are used to infer whether we need to initialize flat scratch. This is, however, not precise. Instead, we should use AMDGPUAttributor and infer amdgpu-no-flat-scratch-init on kernels. Refer to https://github.com/llvm/llvm-project/issues/63586 .

---

Patch is 1.65 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/94647.diff


56 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributes.def (+1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+43) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (+3-7) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll (+14-4) 
- (modified) llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll (+11-10) 
- (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (+58-54) 
- (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (+25-23) 
- (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll (+9-9) 
- (added) llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-globalisel.ll (+1028) 
- (added) llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll (+914) 
- (modified) llvm/test/CodeGen/AMDGPU/attributor-noopt.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll (+18-18) 
- (modified) llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-address-space.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll (+15-4) 
- (modified) llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll (+15-15) 
- (modified) llvm/test/CodeGen/AMDGPU/ipra.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll (+24-72) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.kernel.id.ll (+7-10) 
- (modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll (+3-12) 
- (modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll (+3-12) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll (+1380) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll (+75) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll (+1380) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll (+1380) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll (+66) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll (+1365) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll (+1320) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll (+273) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll (+15) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll (+276) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll (+261) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll (+18) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll (+276) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll (+276) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll (+9) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll (+6) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll (+34-25) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll (+18-12) 
- (modified) llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll (+9-9) 
- (modified) llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll (+22-22) 
- (modified) llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll (+1-1) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
index bacc8e4e821e5..8c1c8219690ba 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
@@ -30,5 +30,6 @@ AMDGPU_ATTRIBUTE(WORKITEM_ID_Z, "amdgpu-no-workitem-id-z")
 AMDGPU_ATTRIBUTE(LDS_KERNEL_ID, "amdgpu-no-lds-kernel-id")
 AMDGPU_ATTRIBUTE(DEFAULT_QUEUE, "amdgpu-no-default-queue")
 AMDGPU_ATTRIBUTE(COMPLETION_ACTION, "amdgpu-no-completion-action")
+AMDGPU_ATTRIBUTE(FLAT_SCRATCH_INIT, "amdgpu-no-flat-scratch-init")
 
 #undef AMDGPU_ATTRIBUTE
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 43bfd0f13f875..8bdc9eab577a9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -433,6 +433,19 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
       indicatePessimisticFixpoint();
       return;
     }
+
+    bool HasAllocaOrASCast = false;
+    for (BasicBlock &BB : *F) {
+      for (Instruction &I : BB) {
+        if (isa<AllocaInst>(I) || isa<AddrSpaceCastInst>(I)) {
+          HasAllocaOrASCast = true;
+          removeAssumedBits(FLAT_SCRATCH_INIT);
+          break;
+        }
+      }
+      if (HasAllocaOrASCast)
+        break;
+    }
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
@@ -519,6 +532,9 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
     if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
       removeAssumedBits(COMPLETION_ACTION);
 
+    if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A))
+      removeAssumedBits(FLAT_SCRATCH_INIT);
+
     return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
                                        : ChangeStatus::UNCHANGED;
   }
@@ -677,6 +693,33 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
     return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
                                               UsedAssumedInformation);
   }
+
+  // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
+  // not to be set.
+  bool needFlatScratchInit(Attributor &A) {
+    // This is called on each callee; false means callee shouldn't have
+    // no-flat-scratch-init.
+    auto CheckForNoFlatScratchInit = [&](Instruction &I) {
+      const auto &CB = cast<CallBase>(I);
+      const Value *CalleeOp = CB.getCalledOperand();
+      const Function *Callee = dyn_cast<Function>(CalleeOp);
+      if (!Callee) // indirect call
+        return CB.isInlineAsm();
+
+      if (Callee->isIntrinsic())
+        return true;
+
+      const auto *CalleeInfo = A.getAAFor<AAAMDAttributes>(
+          *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
+      return CalleeInfo && CalleeInfo->isAssumed(FLAT_SCRATCH_INIT);
+    };
+
+    bool UsedAssumedInformation = false;
+    // If any callee is false (i.e. need FlatScratchInit),
+    // checkForAllCallLikeInstructions returns false
+    return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this,
+                                              UsedAssumedInformation);
+  }
 };
 
 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 94ee4ac78142d..511e711bf724d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -1040,12 +1040,8 @@ GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo(const Function &F,
   const CallingConv::ID CC = F.getCallingConv();
   const bool IsKernel =
       CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL;
-  // FIXME: Should have analysis or something rather than attribute to detect
-  // calls.
-  const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
-  // FIXME: This attribute is a hack, we just need an analysis on the function
-  // to look for allocas.
-  const bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
+  const bool NoFlatScratchInit =
+      F.hasFnAttribute("amdgpu-no-flat-scratch-init");
 
   if (IsKernel && (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0))
     KernargSegmentPtr = true;
@@ -1073,7 +1069,7 @@ GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo(const Function &F,
   // lowering.
   if (ST.hasFlatAddressSpace() && AMDGPU::isEntryFunctionCC(CC) &&
       (IsAmdHsaOrMesa || ST.enableFlatScratch()) &&
-      (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
+      (!NoFlatScratchInit || ST.enableFlatScratch()) &&
       !ST.flatScratchIsArchitected()) {
     FlatScratchInit = true;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
index 8859ac69923a9..74ba17d4bf59d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
@@ -12,7 +12,9 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
 ; GFX8V4:       ; %bb.0:
 ; GFX8V4-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
 ; GFX8V4-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x40
-; GFX8V4-NEXT:    v_mov_b32_e32 v2, 1
+; GFX8V4-NEXT:    s_add_i32 s8, s8, s11
+; GFX8V4-NEXT:    s_lshr_b32 flat_scratch_hi, s8, 8
+; GFX8V4-NEXT:    s_mov_b32 flat_scratch_lo, s9
 ; GFX8V4-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8V4-NEXT:    s_mov_b32 s4, s0
 ; GFX8V4-NEXT:    s_mov_b32 s5, s3
@@ -23,6 +25,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
 ; GFX8V4-NEXT:    s_cmp_lg_u32 s1, -1
 ; GFX8V4-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX8V4-NEXT:    s_cselect_b64 s[0:1], s[6:7], 0
+; GFX8V4-NEXT:    v_mov_b32_e32 v2, 1
 ; GFX8V4-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX8V4-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8V4-NEXT:    s_waitcnt vmcnt(0)
@@ -37,7 +40,9 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
 ; GFX8V5:       ; %bb.0:
 ; GFX8V5-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
 ; GFX8V5-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0xc8
-; GFX8V5-NEXT:    v_mov_b32_e32 v2, 1
+; GFX8V5-NEXT:    s_add_i32 s6, s6, s9
+; GFX8V5-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
+; GFX8V5-NEXT:    s_mov_b32 flat_scratch_lo, s7
 ; GFX8V5-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8V5-NEXT:    s_mov_b32 s4, s0
 ; GFX8V5-NEXT:    s_mov_b32 s5, s2
@@ -47,6 +52,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
 ; GFX8V5-NEXT:    s_cmp_lg_u32 s1, -1
 ; GFX8V5-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX8V5-NEXT:    s_cselect_b64 s[0:1], s[2:3], 0
+; GFX8V5-NEXT:    v_mov_b32_e32 v2, 1
 ; GFX8V5-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX8V5-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8V5-NEXT:    s_waitcnt vmcnt(0)
@@ -60,9 +66,10 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
 ; GFX9V4-LABEL: addrspacecast:
 ; GFX9V4:       ; %bb.0:
 ; GFX9V4-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9V4-NEXT:    s_add_u32 flat_scratch_lo, s6, s9
+; GFX9V4-NEXT:    s_addc_u32 flat_scratch_hi, s7, 0
 ; GFX9V4-NEXT:    s_mov_b64 s[2:3], src_private_base
 ; GFX9V4-NEXT:    s_mov_b64 s[4:5], src_shared_base
-; GFX9V4-NEXT:    v_mov_b32_e32 v2, 1
 ; GFX9V4-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9V4-NEXT:    s_mov_b32 s2, s0
 ; GFX9V4-NEXT:    s_cmp_lg_u32 s0, -1
@@ -71,6 +78,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
 ; GFX9V4-NEXT:    s_cmp_lg_u32 s1, -1
 ; GFX9V4-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9V4-NEXT:    s_cselect_b64 s[0:1], s[4:5], 0
+; GFX9V4-NEXT:    v_mov_b32_e32 v2, 1
 ; GFX9V4-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9V4-NEXT:    flat_store_dword v[0:1], v2
 ; GFX9V4-NEXT:    s_waitcnt vmcnt(0)
@@ -84,9 +92,10 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
 ; GFX9V5-LABEL: addrspacecast:
 ; GFX9V5:       ; %bb.0:
 ; GFX9V5-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9V5-NEXT:    s_add_u32 flat_scratch_lo, s6, s9
+; GFX9V5-NEXT:    s_addc_u32 flat_scratch_hi, s7, 0
 ; GFX9V5-NEXT:    s_mov_b64 s[2:3], src_private_base
 ; GFX9V5-NEXT:    s_mov_b64 s[4:5], src_shared_base
-; GFX9V5-NEXT:    v_mov_b32_e32 v2, 1
 ; GFX9V5-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9V5-NEXT:    s_mov_b32 s2, s0
 ; GFX9V5-NEXT:    s_cmp_lg_u32 s0, -1
@@ -95,6 +104,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
 ; GFX9V5-NEXT:    s_cmp_lg_u32 s1, -1
 ; GFX9V5-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9V5-NEXT:    s_cselect_b64 s[0:1], s[4:5], 0
+; GFX9V5-NEXT:    v_mov_b32_e32 v2, 1
 ; GFX9V5-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9V5-NEXT:    flat_store_dword v[0:1], v2
 ; GFX9V5-NEXT:    s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
index cff9ce0506679..96bbcb7ed2149 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -233,9 +233,9 @@ attributes #1 = { nounwind }
 ; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
 ;.
 ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ;.
 ; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index 33b1cc65dc569..5ace66fd2dd76 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -116,7 +116,7 @@ define amdgpu_kernel void @kernel_calls_extern() {
 define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
 ; CHECK-SAME: ) #[[ATTR4]] {
-; CHECK-NEXT:    call void @unknown() #[[ATTR9:[0-9]+]]
+; CHECK-NEXT:    call void @unknown() #[[ATTR10:[0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
   call void @unknown() #0
@@ -136,7 +136,7 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
 define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
 ; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR4]] {
-; CHECK-NEXT:    call void [[INDIRECT]]() #[[ATTR9]]
+; CHECK-NEXT:    call void [[INDIRECT]]() #[[ATTR10]]
 ; CHECK-NEXT:    ret void
 ;
   call void %indirect() #0
@@ -229,7 +229,7 @@ define amdgpu_kernel void @kernel_calls_workitem_id_x(ptr addrspace(1) %out) {
 
 define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
 ; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr(
-; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR7:[0-9]+]] {
 ; CHECK-NEXT:    [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
 ; CHECK-NEXT:    call void [[FPTR]]()
 ; CHECK-NEXT:    ret void
@@ -242,14 +242,15 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
 
 attributes #0 = { "amdgpu-no-agpr" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR3:[0-9]+]] = { "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR4]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-agpr" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR10]] = { "amdgpu-no-agpr" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kern...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/94647


More information about the llvm-commits mailing list