[llvm] pr/amdgpu closed world (PR #66488)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 15 03:43:35 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
<details>
<summary>Changes</summary>
None
--
Patch is 241.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66488.diff
14 Files Affected:
- (modified) llvm/include/llvm/Transforms/IPO/Attributor.h (+5-4)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+32-1)
- (modified) llvm/lib/Transforms/IPO/Attributor.cpp (+1-1)
- (modified) llvm/lib/Transforms/IPO/AttributorAttributes.cpp (+2-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll (+22-14)
- (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (+74-25)
- (modified) llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll (+43-22)
- (modified) llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll (+30-14)
- (modified) llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll (+22-10)
- (modified) llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll (+17-9)
- (modified) llvm/test/CodeGen/AMDGPU/indirect-call.ll (+2189-1820)
- (modified) llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll (+44-21)
- (modified) llvm/test/CodeGen/AMDGPU/sibling-call.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll (+36-12)
<pre>
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index bd1bd8261123e51..f266620b65ca1fe 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1435,7 +1435,7 @@ struct AttributorConfig {
/// Callback function to determine if an indirect call targets should be made
/// direct call targets (with an if-cascade).
std::function<bool(Attributor &A, const AbstractAttribute &AA, CallBase &CB,
- Function &AssummedCallee)>
+ Function &AssummedCallee, unsigned NumCallees)>
IndirectCalleeSpecializationCallback = nullptr;
/// Helper to update an underlying call graph and to delete functions.
@@ -1717,10 +1717,11 @@ struct Attributor {
/// Return true if we should specialize the call site \b CB for the potential
/// callee \p Fn.
bool shouldSpecializeCallSiteForCallee(const AbstractAttribute &AA,
- CallBase &CB, Function &Callee) {
+ CallBase &CB, Function &Callee,
+ unsigned NumCallees) {
return Configuration.IndirectCalleeSpecializationCallback
- ? Configuration.IndirectCalleeSpecializationCallback(*this, AA,
- CB, Callee)
+ ? Configuration.IndirectCalleeSpecializationCallback(
+ *this, AA, CB, Callee, NumCallees)
: true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 57c873f00a4a195..fb203c9e4006426 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -14,11 +14,15 @@
#include "GCNSubtarget.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/CycleAnalysis.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO/Attributor.h"
+#include <optional>
#define DEBUG_TYPE "amdgpu-attributor"
@@ -944,16 +948,29 @@ class AMDGPUAttributor : public ModulePass {
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
&AAAMDWavesPerEU::ID, &AACallEdges::ID, &AAPointerInfo::ID,
- &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID});
+ &AAIndirectCallInfo::ID, &AAPotentialConstantValues::ID,
+ &AAUnderlyingObjects::ID});
AttributorConfig AC(CGUpdater);
AC.Allowed = &Allowed;
AC.IsModulePass = true;
AC.DefaultInitializeLiveInternals = false;
+ AC.IsClosedWorldModule = true;
AC.IPOAmendableCB = [](const Function &F) {
return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
};
+ // Callback to determine if we should specialize a indirect call site with a
+ // specific callee. It's effectively a heuristic and we can add checks for
+ // the callee size, PGO, etc. For now, we check for single potential callees
+ // and kernel arguments as they are known uniform values.
+ AC.IndirectCalleeSpecializationCallback =
+ [&](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
+ Function &Callee, unsigned NumCallees) {
+ return indirectCalleeSpecializationCallback(A, AA, CB, Callee,
+ NumCallees);
+ };
+
Attributor A(Functions, InfoCache, AC);
for (Function &F : M) {
@@ -975,6 +992,20 @@ class AMDGPUAttributor : public ModulePass {
AU.addRequired<CycleInfoWrapperPass>();
}
+ /// Helper to decide if we should specialize the indirect \p CB for \p Callee,
+ /// which is one of the \p NumCallees potential callees.
+ bool indirectCalleeSpecializationCallback(Attributor &A,
+ const AbstractAttribute &AA,
+ CallBase &CB, Function &Callee,
+ unsigned NumCallees) {
+ // Singleton functions should be specialized.
+ if (NumCallees == 1)
+ return true;
+ // Otherewise specialize uniform values.
+ const auto &TTI = TM->getTargetTransformInfo(*CB.getCaller());
+ return TTI.isAlwaysUniform(CB.getCalledOperand());
+ }
+
StringRef getPassName() const override { return "AMDGPU Attributor"; }
TargetMachine *TM;
static char ID;
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 1ffafc65ba63a4f..5b5a9a28f6d3838 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -3821,7 +3821,7 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
if (MaxSpecializationPerCB.getNumOccurrences()) {
AC.IndirectCalleeSpecializationCallback =
[&](Attributor &, const AbstractAttribute &AA, CallBase &CB,
- Function &Callee) {
+ Function &Callee, unsigned NumCallees) {
if (MaxSpecializationPerCB == 0)
return false;
auto &Set = IndirectCalleeTrackingMap[&CB];
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 03b5dc3899ac8f8..86c6bb04368e241 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12352,7 +12352,8 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo {
SmallVector<Function *, 8> SkippedAssumedCallees;
SmallVector<std::pair<CallInst *, Instruction *>> NewCalls;
for (Function *NewCallee : AssumedCallees) {
- if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee)) {
+ if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee,
+ AssumedCallees.size())) {
SkippedAssumedCallees.push_back(NewCallee);
SpecializedForAllCallees = false;
continue;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
index 4eba84f61c2d8a3..0bb9a58c29ae718 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s
+; RUN: llc -global-isel -stop-after=irtranslator -attributor-assume-closed-world=false -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope --check-prefixes=SAMEC,CHECK %s
+; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope --check-prefixes=SAMEC,CWRLD %s
define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) {
; CHECK-LABEL: name: test_indirect_call_sgpr_ptr
@@ -52,24 +53,31 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) {
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
+ ;
+ ; CWRLD-LABEL: name: test_indirect_call_sgpr_ptr
+ ; CWRLD: bb.1 (%ir-block.0):
+ ; CWRLD-NEXT: liveins: $sgpr4_sgpr5
+ ; CWRLD-NEXT: {{ $}}
+ ; CWRLD-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+ ; CWRLD-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
call void %fptr()
ret void
}
define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(ptr %fptr) {
- ; CHECK-LABEL: name: test_gfx_indirect_call_sgpr_ptr
- ; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; CHECK-NEXT: SI_RETURN
+ ; SAMEC-LABEL: name: test_gfx_indirect_call_sgpr_ptr
+ ; SAMEC: bb.1 (%ir-block.0):
+ ; SAMEC-NEXT: liveins: $vgpr0, $vgpr1
+ ; SAMEC-NEXT: {{ $}}
+ ; SAMEC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; SAMEC-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; SAMEC-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; SAMEC-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+ ; SAMEC-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; SAMEC-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
+ ; SAMEC-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; SAMEC-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ ; SAMEC-NEXT: SI_RETURN
call amdgpu_gfx void %fptr()
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index c8ac5bcf8d22242..35affc7a8b140be 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=AKF_HSA %s
-; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor -attributor-assume-closed-world=false < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA,OWRLD_ATTR_HSA %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA,CWRLD_ATTR_HSA %s
; TODO: The test contains UB which is refined by the Attributor and should be removed.
@@ -18,6 +19,16 @@ declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
declare i64 @llvm.amdgcn.dispatch.id() #0
+ at G1 = global ptr poison
+ at G2 = global ptr poison
+
+;.
+; AKF_HSA: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global ptr poison
+; AKF_HSA: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = global ptr poison
+;.
+; ATTRIBUTOR_HSA: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global ptr poison
+; ATTRIBUTOR_HSA: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = global ptr poison
+;.
define void @use_workitem_id_x() #1 {
; AKF_HSA-LABEL: define {{[^@]+}}@use_workitem_id_x
; AKF_HSA-SAME: () #[[ATTR1:[0-9]+]] {
@@ -766,19 +777,55 @@ define float @func_indirect_call(ptr %fptr) #3 {
; AKF_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR3]] {
; AKF_HSA-NEXT: [[F:%.*]] = call float [[FPTR]]()
; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
+; AKF_HSA-NEXT: store ptr @indirect_callee1, ptr @G1, align 8
+; AKF_HSA-NEXT: store ptr @indirect_callee2, ptr @G2, align 8
; AKF_HSA-NEXT: ret float [[FADD]]
;
-; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_call
-; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] {
-; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float [[FPTR]]()
-; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
-; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]]
+; OWRLD_ATTR_HSA-LABEL: define {{[^@]+}}@func_indirect_call
+; OWRLD_ATTR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] {
+; OWRLD_ATTR_HSA-NEXT: [[F:%.*]] = call float [[FPTR]]()
+; OWRLD_ATTR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
+; OWRLD_ATTR_HSA-NEXT: store ptr @indirect_callee1, ptr @G1, align 8
+; OWRLD_ATTR_HSA-NEXT: store ptr @indirect_callee2, ptr @G2, align 8
+; OWRLD_ATTR_HSA-NEXT: ret float [[FADD]]
+;
+; CWRLD_ATTR_HSA-LABEL: define {{[^@]+}}@func_indirect_call
+; CWRLD_ATTR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR17]] {
+; CWRLD_ATTR_HSA-NEXT: [[F:%.*]] = call float [[FPTR]](), !callees !0
+; CWRLD_ATTR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
+; CWRLD_ATTR_HSA-NEXT: store ptr @indirect_callee1, ptr @G1, align 8
+; CWRLD_ATTR_HSA-NEXT: store ptr @indirect_callee2, ptr @G2, align 8
+; CWRLD_ATTR_HSA-NEXT: ret float [[FADD]]
;
%f = call float %fptr()
%fadd = fadd float %f, 1.0
+ store ptr @indirect_callee1, ptr @G1
+ store ptr @indirect_callee2, ptr @G2
ret float %fadd
}
+define float @indirect_callee1() {
+; AKF_HSA-LABEL: define {{[^@]+}}@indirect_callee1() {
+; AKF_HSA-NEXT: ret float 0x40091EB860000000
+;
+; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_callee1
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] {
+; ATTRIBUTOR_HSA-NEXT: ret float 0x40091EB860000000
+;
+ ret float 0x40091EB860000000
+}
+define float @indirect_callee2(float noundef %arg) {
+; AKF_HSA-LABEL: define {{[^@]+}}@indirect_callee2
+; AKF_HSA-SAME: (float noundef [[ARG:%.*]]) {
+; AKF_HSA-NEXT: ret float [[ARG]]
+;
+; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_callee2
+; ATTRIBUTOR_HSA-SAME: (float noundef [[ARG:%.*]]) #[[ATTR19]] {
+; ATTRIBUTOR_HSA-NEXT: ret float [[ARG]]
+;
+ ret float %arg
+}
+
declare float @extern() #3
define float @func_extern_call() #3 {
; AKF_HSA-LABEL: define {{[^@]+}}@func_extern_call
@@ -845,7 +892,7 @@ define amdgpu_kernel void @kern_sanitize_address() #4 {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
@@ -861,7 +908,7 @@ define void @func_sanitize_address() #4 {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
@@ -877,7 +924,7 @@ define void @func_indirect_sanitize_address() #3 {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address()
; ATTRIBUTOR_HSA-NEXT: ret void
;
@@ -893,7 +940,7 @@ define amdgpu_kernel void @kern_indirect_sanitize_address() #3 {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address()
; ATTRIBUTOR_HSA-NEXT: ret void
;
@@ -928,7 +975,7 @@ define internal void @enqueue_block_def() #6 {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@enqueue_block_def
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR25:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR26:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: ret void
;
ret void
@@ -941,7 +988,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_decl() {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_decl
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR26:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR27:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_decl()
; ATTRIBUTOR_HSA-NEXT: ret void
;
@@ -956,7 +1003,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_def() {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_def
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_def()
; ATTRIBUTOR_HSA-NEXT: ret void
;
@@ -969,7 +1016,7 @@ define void @unused_enqueue_block() {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@unused_enqueue_block
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
; ATTRIBUTOR_HSA-NEXT: ret void
;
ret void
@@ -980,7 +1027,7 @@ define internal void @known_func() {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@known_func
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
; ATTRIBUTOR_HSA-NEXT: ret void
;
ret void
@@ -994,7 +1041,7 @@ define amdgpu_kernel void @kern_callsite_enqueue_block() {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_callsite_enqueue_block
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
; ATTRIBUTOR_HSA-NEXT: call void @known_func() #[[ATTR29:[0-9]+]]
; ATTRIBUTOR_HSA-NEXT: ret void
;
@@ -1040,15 +1087,17 @@ attributes #6 = { "enqueued-block" }
; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" &a...
<truncated>
</pre>
</details>
https://github.com/llvm/llvm-project/pull/66488
More information about the llvm-commits
mailing list