[llvm-branch-commits] [llvm] [AMDGPU][Attributor] Skip update if an AA is at its initial state (PR #114726)
Shilei Tian via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Nov 3 16:36:16 PST 2024
https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/114726
None
>From b7612eddae0b0808f82bb8bc2e6fd6e34361ae5c Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Sun, 3 Nov 2024 19:35:26 -0500
Subject: [PATCH] [AMDGPU][Attributor] Skip update if an AA is at its initial
state
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 105 +++++++++++++++---
.../annotate-kernel-features-hsa-call.ll | 46 ++++----
.../AMDGPU/attributor-loop-issue-58639.ll | 3 +-
.../CodeGen/AMDGPU/direct-indirect-call.ll | 3 +-
.../AMDGPU/remove-no-kernel-id-attribute.ll | 11 +-
.../AMDGPU/uniform-work-group-multistep.ll | 3 +-
.../uniform-work-group-recursion-test.ll | 2 +-
7 files changed, 118 insertions(+), 55 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 7d51412730d4d5..85500b95eec1a4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -740,6 +740,16 @@ struct AAAMDSizeRangeAttribute
if (!CallerInfo || !CallerInfo->isValidState())
return false;
+ /// When the caller AA is in its initial state, the state remains valid
+ /// but awaits propagation. We skip processing in this case. Note that we
+ /// must return true since the state is still considered valid.
+ if (CallerInfo->isAtInitialState()) {
+ LLVM_DEBUG(dbgs() << '[' << getName() << "] Caller "
+ << Caller->getName()
+ << " is still at initial state. Skip the update.\n");
+ return true;
+ }
+
Change |=
clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
@@ -784,6 +794,15 @@ struct AAAMDSizeRangeAttribute
/*ForceReplace=*/true);
}
+ /// The initial state of `IntegerRangeState` represents an empty set, which
+ /// does not constitute a valid range. This empty state complicates
+ /// propagation, particularly for arithmetic operations like
+ /// `getAssumed().getUpper() - 1`. Therefore, it is recommended to skip the
+ /// initial state during processing.
+ bool isAtInitialState() const {
+ return isValidState() && getAssumed().isEmptySet();
+ }
+
const std::string getAsStr(Attributor *) const override {
std::string Str;
raw_string_ostream OS(Str);
@@ -840,6 +859,11 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
Attributor &A);
ChangeStatus manifest(Attributor &A) override {
+ if (isAtInitialState()) {
+ LLVM_DEBUG(dbgs() << '[' << getName()
+ << "] Still at initial state. No manifest.\n";);
+ return ChangeStatus::UNCHANGED;
+ }
Function *F = getAssociatedFunction();
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
return emitAttributeIfNotDefaultAfterClamp(
@@ -927,31 +951,71 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
ChangeStatus Change = ChangeStatus::UNCHANGED;
+ Function *F = getAssociatedFunction();
+
+ const auto *AAFlatWorkGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
+ *this, IRPosition::function(*F), DepClassTy::REQUIRED);
+ if (!AAFlatWorkGroupSize || !AAFlatWorkGroupSize->isValidState()) {
+ LLVM_DEBUG(
+ dbgs() << '[' << getName()
+ << "] AAAMDFlatWorkGroupSize is unavailable or invalid.\n");
+ return ChangeStatus::UNCHANGED;
+ }
+
+ if (AAFlatWorkGroupSize->isAtInitialState()) {
+ LLVM_DEBUG(dbgs() << '[' << getName()
+ << "] AAAMDFlatWorkGroupSize is still at initial "
+ "state. Skip the update.\n");
+ return ChangeStatus::UNCHANGED;
+ }
+
+ auto CurrentWorkGroupSize = std::make_pair(
+ AAFlatWorkGroupSize->getAssumed().getLower().getZExtValue(),
+ AAFlatWorkGroupSize->getAssumed().getUpper().getZExtValue() - 1);
+
+ auto DoUpdate = [&](std::pair<unsigned, unsigned> WavesPerEU,
+ std::pair<unsigned, unsigned> FlatWorkGroupSize) {
+ auto [Min, Max] =
+ InfoCache.getEffectiveWavesPerEU(*F, WavesPerEU, FlatWorkGroupSize);
+ ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
+ IntegerRangeState IRS(CR);
+ Change |= clampStateAndIndicateChange(this->getState(), IRS);
+ };
+
+ // // We need to clamp once if we are not at initial state, because
+ // // AAAMDFlatWorkGroupSize could be updated in last iteration.
+ if (!isAtInitialState()) {
+ auto CurrentWavesPerEU =
+ std::make_pair(getAssumed().getLower().getZExtValue(),
+ getAssumed().getUpper().getZExtValue() - 1);
+ DoUpdate(CurrentWavesPerEU, CurrentWorkGroupSize);
+ }
+
auto CheckCallSite = [&](AbstractCallSite CS) {
Function *Caller = CS.getInstruction()->getFunction();
- Function *Func = getAssociatedFunction();
+
LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
- << "->" << Func->getName() << '\n');
+ << "->" << F->getName() << '\n');
- const auto *CallerInfo = A.getAAFor<AAAMDWavesPerEU>(
+ const auto *AAWavesPerEU = A.getAAFor<AAAMDWavesPerEU>(
*this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
- const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
- *this, IRPosition::function(*Func), DepClassTy::REQUIRED);
- if (!CallerInfo || !AssumedGroupSize || !CallerInfo->isValidState() ||
- !AssumedGroupSize->isValidState())
+ if (!AAWavesPerEU || !AAWavesPerEU->isValidState()) {
+ LLVM_DEBUG(dbgs() << '[' << getName() << "] Caller "
+ << Caller->getName()
+ << " is unavailable or invalid.\n");
return false;
+ }
+ if (AAWavesPerEU->isAtInitialState()) {
+ LLVM_DEBUG(dbgs() << '[' << getName() << "] Caller "
+ << Caller->getName()
+ << " is still at initial state. Skip the update.\n");
+ return true;
+ }
- unsigned Min, Max;
- std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU(
- *Caller,
- {CallerInfo->getAssumed().getLower().getZExtValue(),
- CallerInfo->getAssumed().getUpper().getZExtValue() - 1},
- {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
- AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
- ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1));
- IntegerRangeState CallerRangeState(CallerRange);
- Change |= clampStateAndIndicateChange(this->getState(), CallerRangeState);
-
+ auto CallerWavesPerEU = std::make_pair(
+ AAWavesPerEU->getAssumed().getLower().getZExtValue(),
+ AAWavesPerEU->getAssumed().getUpper().getZExtValue() - 1);
+ DoUpdate(CallerWavesPerEU, CurrentWorkGroupSize);
return true;
};
@@ -967,6 +1031,11 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
Attributor &A);
ChangeStatus manifest(Attributor &A) override {
+ if (isAtInitialState()) {
+ LLVM_DEBUG(dbgs() << '[' << getName()
+ << "] Still at initial state. No manifest.\n";);
+ return ChangeStatus::UNCHANGED;
+ }
Function *F = getAssociatedFunction();
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
return emitAttributeIfNotDefaultAfterClamp(
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index 773ba4c8df673d..6d1842167824b1 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -688,7 +688,7 @@ define void @func_call_asm() #3 {
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_asm
; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] {
-; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR26:[0-9]+]]
+; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR24:[0-9]+]]
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void asm sideeffect "", ""() #3
@@ -717,7 +717,7 @@ define amdgpu_kernel void @func_kern_defined() #3 {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_kern_defined
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] {
; ATTRIBUTOR_HSA-NEXT: call void @defined.func()
; ATTRIBUTOR_HSA-NEXT: ret void
;
@@ -845,7 +845,7 @@ define amdgpu_kernel void @kern_sanitize_address() #4 {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
@@ -861,7 +861,7 @@ define void @func_sanitize_address() #4 {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR18]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] {
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
@@ -877,7 +877,7 @@ define void @func_indirect_sanitize_address() #3 {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address()
; ATTRIBUTOR_HSA-NEXT: ret void
;
@@ -893,7 +893,7 @@ define amdgpu_kernel void @kern_indirect_sanitize_address() #3 {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR18]] {
; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address()
; ATTRIBUTOR_HSA-NEXT: ret void
;
@@ -928,7 +928,7 @@ define internal void @enqueue_block_def() #6 {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@enqueue_block_def
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: ret void
;
ret void
@@ -941,7 +941,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_decl() {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_decl
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_decl()
; ATTRIBUTOR_HSA-NEXT: ret void
;
@@ -956,7 +956,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_def() {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_def
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR24:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_def()
; ATTRIBUTOR_HSA-NEXT: ret void
;
@@ -969,7 +969,7 @@ define void @unused_enqueue_block() {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@unused_enqueue_block
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR25:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR23]] {
; ATTRIBUTOR_HSA-NEXT: ret void
;
ret void
@@ -980,7 +980,7 @@ define internal void @known_func() {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@known_func
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR25]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR23]] {
; ATTRIBUTOR_HSA-NEXT: ret void
;
ret void
@@ -994,8 +994,8 @@ define amdgpu_kernel void @kern_callsite_enqueue_block() {
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_callsite_enqueue_block
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR24]] {
-; ATTRIBUTOR_HSA-NEXT: call void @known_func() #[[ATTR27:[0-9]+]]
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR23]] {
+; ATTRIBUTOR_HSA-NEXT: call void @known_func() #[[ATTR25:[0-9]+]]
; ATTRIBUTOR_HSA-NEXT: ret void
;
call void @known_func() #6
@@ -1041,17 +1041,15 @@ attributes #6 = { "enqueued-block" }
; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind sanitize_address "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR20:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR21:[0-9]+]] = { "enqueued-block" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR24]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { nounwind }
-; ATTRIBUTOR_HSA: attributes #[[ATTR27]] = { "enqueued-block" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR19:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR20:[0-9]+]] = { "enqueued-block" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR24]] = { nounwind }
+; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "enqueued-block" }
;.
; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
;.
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll b/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
index 8481cea4d7c353..a9efcdcb0af6d0 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
@@ -51,7 +51,7 @@ bb5: ; preds = %bb5, %bb3
define amdgpu_kernel void @entry() {
; CHECK-LABEL: define {{[^@]+}}@entry
-; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5)
; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
; CHECK-NEXT: [[ARST:%.*]] = call double @baz(ptr [[CAST]])
@@ -64,5 +64,4 @@ define amdgpu_kernel void @entry() {
}
;.
; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
index d9be8cbc73a5e9..145d730d356537 100644
--- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
@@ -27,7 +27,7 @@ define internal void @direct() {
define amdgpu_kernel void @test_direct_indirect_call() {
; CHECK-LABEL: define {{[^@]+}}@test_direct_indirect_call
-; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: call void @direct()
; CHECK-NEXT: ret void
;
@@ -37,5 +37,4 @@ define amdgpu_kernel void @test_direct_indirect_call() {
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll b/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
index b3a0649bde0922..f0bc5a8e0ebc93 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
+++ b/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
@@ -12,7 +12,7 @@
; CHECK: @llvm.amdgcn.kernel.k1_f0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1_f0.lds.t poison, align 2, !absolute_symbol [[META0]]
; CHECK: @llvm.amdgcn.kernel.kernel_lds.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_lds.lds.t poison, align 2, !absolute_symbol [[META0]]
; CHECK: @llvm.amdgcn.kernel.kernel_lds_recursion.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_lds_recursion.lds.t poison, align 2, !absolute_symbol [[META0]]
-; CHECK: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [2 x i32]]
+; CHECK: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [2 x i32]] [[2 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k0_f0.lds to i32), i32 poison], [2 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k1_f0.lds to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1_f0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1_f0.lds, i32 0, i32 1) to i32)], [2 x i32] [i32 poison, i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.kernel_lds_recursion.lds to i32)]]
;.
define internal void @lds_use_through_indirect() {
; CHECK-LABEL: define internal void @lds_use_through_indirect(
@@ -178,7 +178,7 @@ define internal void @mutual_recursion_1(i16 %arg) {
define amdgpu_kernel void @kernel_lds_recursion() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_lds_recursion(
-; CHECK-SAME: ) #[[ATTR5:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META9:![0-9]+]] {
+; CHECK-SAME: ) #[[ATTR2]] !llvm.amdgcn.lds.kernel.id [[META9:![0-9]+]] {
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_lds_recursion.lds) ]
; CHECK-NEXT: call void @mutual_recursion_0(i16 0)
; CHECK-NEXT: ret void
@@ -194,11 +194,10 @@ define amdgpu_kernel void @kernel_lds_recursion() {
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR2]] = { "amdgpu-lds-size"="2" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-lds-size"="4" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-lds-size"="4" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR4]] = { "amdgpu-lds-size"="2" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-lds-size"="2" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
;.
; CHECK: [[META0]] = !{i32 0, i32 1}
; CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
index c75ce90684c315..a12faf715f5b19 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
@@ -87,7 +87,7 @@ define internal void @internal2() {
define amdgpu_kernel void @kernel2() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel2
-; CHECK-SAME: () #[[ATTR4:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR3]] {
; CHECK-NEXT: call void @internal2()
; CHECK-NEXT: ret void
;
@@ -101,5 +101,4 @@ attributes #0 = { "uniform-work-group-size"="true" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR2]] = { "uniform-work-group-size"="true" }
; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="true" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
index 52c0a258c4aab2..cd231be85221f4 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
@@ -103,5 +103,5 @@ attributes #1 = { "uniform-work-group-size"="true" }
;.
; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.
More information about the llvm-branch-commits
mailing list