[llvm-branch-commits] [llvm] AMDGPU: Propagate amdgpu-max-num-workgroups attribute (PR #113018)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Oct 28 15:19:41 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/113018
>From 39896e6caccb02132b4c0c1a6e73f0e28dd27022 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 18 Oct 2024 23:05:51 +0400
Subject: [PATCH 1/4] AMDGPU: Propagate amdgpu-max-num-workgroups attribute
I'm not sure what the interpretation of 0 is supposed to be,
AMDGPUUsage doesn't say.
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 154 +++++++++++-
...ttr-amdgpu-max-num-workgroups-propagate.ll | 228 ++++++++++++++++++
2 files changed, 380 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 687a7339da379d..58b90ddc36605c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -179,6 +179,11 @@ class AMDGPUInformationCache : public InformationCache {
return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
}
+ SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ return ST.getMaxNumWorkGroups(F);
+ }
+
/// Get code object version.
unsigned getCodeObjectVersion() const {
return CodeObjectVersion;
@@ -821,6 +826,150 @@ AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
"AAAMDFlatWorkGroupSize is only valid for function position");
}
+struct TupleDecIntegerRangeState : public AbstractState {
+ DecIntegerState<uint32_t> X, Y, Z;
+
+ bool isValidState() const override {
+ return X.isValidState() && Y.isValidState() && Z.isValidState();
+ }
+
+ bool isAtFixpoint() const override {
+ return X.isAtFixpoint() && Y.isAtFixpoint() && Z.isAtFixpoint();
+ }
+
+ ChangeStatus indicateOptimisticFixpoint() override {
+ return X.indicateOptimisticFixpoint() | Y.indicateOptimisticFixpoint() |
+ Z.indicateOptimisticFixpoint();
+ }
+
+ ChangeStatus indicatePessimisticFixpoint() override {
+ return X.indicatePessimisticFixpoint() | Y.indicatePessimisticFixpoint() |
+ Z.indicatePessimisticFixpoint();
+ }
+
+ TupleDecIntegerRangeState operator^=(const TupleDecIntegerRangeState &Other) {
+ X ^= Other.X;
+ Y ^= Other.Y;
+ Z ^= Other.Z;
+ return *this;
+ }
+
+ bool operator==(const TupleDecIntegerRangeState &Other) const {
+ return X == Other.X && Y == Other.Y && Z == Other.Z;
+ }
+
+ TupleDecIntegerRangeState &getAssumed() { return *this; }
+ const TupleDecIntegerRangeState &getAssumed() const { return *this; }
+};
+
+using AAAMDMaxNumWorkgroupsState =
+ StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;
+
+/// Propagate amdgpu-max-num-workgroups attribute.
+struct AAAMDMaxNumWorkgroups
+ : public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
+ using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;
+
+ AAAMDMaxNumWorkgroups(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+
+ void initialize(Attributor &A) override {
+ Function *F = getAssociatedFunction();
+ auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
+
+ SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*F);
+
+ // FIXME: What is the interpretation of 0?
+ for (unsigned &Entry : MaxNumWorkgroups) {
+ if (Entry == 0)
+ Entry = std::numeric_limits<uint32_t>::max();
+ }
+
+ X.takeKnownMinimum(MaxNumWorkgroups[0]);
+ Y.takeKnownMinimum(MaxNumWorkgroups[1]);
+ Z.takeKnownMinimum(MaxNumWorkgroups[2]);
+
+ if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
+ indicatePessimisticFixpoint();
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Change = ChangeStatus::UNCHANGED;
+
+ auto CheckCallSite = [&](AbstractCallSite CS) {
+ Function *Caller = CS.getInstruction()->getFunction();
+ LLVM_DEBUG(dbgs() << "[AAAMDMaxNumWorkgroups] Call " << Caller->getName()
+ << "->" << getAssociatedFunction()->getName() << '\n');
+
+ const auto *CallerInfo = A.getAAFor<AAAMDMaxNumWorkgroups>(
+ *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
+ if (!CallerInfo)
+ return false;
+
+ Change |=
+ clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
+ return true;
+ };
+
+ bool AllCallSitesKnown = true;
+ if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
+ return indicatePessimisticFixpoint();
+
+ return Change;
+ }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAAMDMaxNumWorkgroups &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ ChangeStatus manifest(Attributor &A) override {
+ Function *F = getAssociatedFunction();
+ // TODO: Skip adding if worst case?
+ LLVMContext &Ctx = F->getContext();
+ SmallString<32> Buffer;
+ raw_svector_ostream OS(Buffer);
+ OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed();
+
+ // TODO: Should annotate loads of the group size for this to do anything
+ // useful.
+ return A.manifestAttrs(
+ getIRPosition(),
+ {Attribute::get(Ctx, "amdgpu-max-num-workgroups", OS.str())},
+ /* ForceReplace= */ true);
+ }
+
+ const std::string getName() const override { return "AAAMDMaxNumWorkgroups"; }
+
+ const std::string getAsStr(Attributor *) const override {
+ std::string Buffer = "AAAMDMaxNumWorkgroupsState[";
+ raw_string_ostream OS(Buffer);
+ OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed()
+ << ']';
+ return OS.str();
+ }
+
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is
+ /// AAAMDMaxNumWorkgroups
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ void trackStatistics() const override {}
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
+const char AAAMDMaxNumWorkgroups::ID = 0;
+
+AAAMDMaxNumWorkgroups &
+AAAMDMaxNumWorkgroups::createForPosition(const IRPosition &IRP, Attributor &A) {
+ if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
+ return *new (A.Allocator) AAAMDMaxNumWorkgroups(IRP, A);
+ llvm_unreachable("AAAMDMaxNumWorkgroups is only valid for function position");
+}
+
/// Propagate amdgpu-waves-per-eu attribute.
struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
@@ -1043,8 +1192,8 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
DenseSet<const char *> Allowed(
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
- &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
- &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
+ &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
+ &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
&AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
&AAInstanceInfo::ID});
@@ -1068,6 +1217,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
for (auto *F : Functions) {
A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));
A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));
+ A.getOrCreateAAFor<AAAMDMaxNumWorkgroups>(IRPosition::function(*F));
A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(*F));
CallingConv::ID CC = F->getCallingConv();
if (!AMDGPU::isEntryFunctionCC(CC)) {
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll
new file mode 100644
index 00000000000000..bd1aa6e6ed4701
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll
@@ -0,0 +1,228 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck %s
+
+; External call to avoid inferring argument attributes. This makes the
+; final attribute groups easier to read
+declare void @dummy()
+
+define void @extern_callee() {
+; CHECK-LABEL: define void @extern_callee(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @dummy()
+ ret void
+}
+
+define internal void @callee_1_2_3() {
+; CHECK-LABEL: define internal void @callee_1_2_3(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @dummy()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_1_2_3() #0 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_1_2_3(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void @callee_1_2_3()
+; CHECK-NEXT: call void @extern_callee()
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @callee_1_2_3()
+ call void @extern_callee()
+ call void @dummy()
+ ret void
+}
+
+attributes #0 = {"amdgpu-max-num-workgroups"="1,2,3"}
+
+; -> 100,10,99
+define internal void @callee_merge_100_8_32__16_10_99() {
+; CHECK-LABEL: define internal void @callee_merge_100_8_32__16_10_99(
+; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @dummy()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_100_8_32() #1 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_100_8_32(
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT: call void @callee_merge_100_8_32__16_10_99()
+; CHECK-NEXT: ret void
+;
+ call void @callee_merge_100_8_32__16_10_99()
+ ret void
+}
+
+attributes #1 = {"amdgpu-max-num-workgroups"="100,8,32"}
+
+define amdgpu_kernel void @kernel_16_10_99() #2 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_16_10_99(
+; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT: call void @callee_merge_100_8_32__16_10_99()
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @callee_merge_100_8_32__16_10_99()
+ call void @dummy()
+ ret void
+}
+
+attributes #2 = {"amdgpu-max-num-workgroups"="16,10,99"}
+
+define internal void @merge_to_worst_case() {
+; CHECK-LABEL: define internal void @merge_to_worst_case(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @dummy()
+ ret void
+}
+
+define internal void @callee_x_worst_case() {
+; CHECK-LABEL: define internal void @callee_x_worst_case(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @dummy()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_x_maximum() #3 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_x_maximum(
+; CHECK-SAME: ) #[[ATTR5:[0-9]+]] {
+; CHECK-NEXT: call void @merge_to_worst_case()
+; CHECK-NEXT: call void @callee_x_worst_case()
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @merge_to_worst_case()
+ call void @callee_x_worst_case()
+ call void @dummy()
+ ret void
+}
+
+attributes #3 = {"amdgpu-max-num-workgroups"="4294967295,1,1"}
+
+define amdgpu_kernel void @kernel_y_maximum() #4 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_y_maximum(
+; CHECK-SAME: ) #[[ATTR6:[0-9]+]] {
+; CHECK-NEXT: call void @merge_to_worst_case()
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @merge_to_worst_case()
+ call void @dummy()
+ ret void
+}
+
+attributes #4 = {"amdgpu-max-num-workgroups"="1,4294967295,1"}
+
+define amdgpu_kernel void @kernel_z_maximum() #5 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_z_maximum(
+; CHECK-SAME: ) #[[ATTR7:[0-9]+]] {
+; CHECK-NEXT: call void @merge_to_worst_case()
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @merge_to_worst_case()
+ call void @dummy()
+ ret void
+}
+
+attributes #5 = {"amdgpu-max-num-workgroups"="1,1,4294967295"}
+
+; Make sure the attribute isn't lost from the callee.
+define internal void @annotated_callee_from_unannotated_kernel() #6 {
+; CHECK-LABEL: define internal void @annotated_callee_from_unannotated_kernel(
+; CHECK-SAME: ) #[[ATTR8:[0-9]+]] {
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @dummy()
+ ret void
+}
+
+attributes #6 = {"amdgpu-max-num-workgroups"="42,99,123"}
+
+define amdgpu_kernel void @unannotated_kernel_calls_annotated_callee() {
+; CHECK-LABEL: define amdgpu_kernel void @unannotated_kernel_calls_annotated_callee(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @annotated_callee_from_unannotated_kernel()
+; CHECK-NEXT: ret void
+;
+ call void @annotated_callee_from_unannotated_kernel()
+ ret void
+}
+
+
+define internal void @annotated_callee_merge_caller() #7 {
+; CHECK-LABEL: define internal void @annotated_callee_merge_caller(
+; CHECK-SAME: ) #[[ATTR9:[0-9]+]] {
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @dummy()
+ ret void
+}
+
+attributes #7 = {"amdgpu-max-num-workgroups"="512,256,1024"}
+
+define amdgpu_kernel void @call_annotated_callee_merge_caller() #8 {
+; CHECK-LABEL: define amdgpu_kernel void @call_annotated_callee_merge_caller(
+; CHECK-SAME: ) #[[ATTR10:[0-9]+]] {
+; CHECK-NEXT: call void @annotated_callee_merge_caller()
+; CHECK-NEXT: ret void
+;
+ call void @annotated_callee_merge_caller()
+ ret void
+}
+
+attributes #8 = {"amdgpu-max-num-workgroups"="256,128,2048"}
+
+define internal void @called_by_explicit_worst_case() {
+; CHECK-LABEL: define internal void @called_by_explicit_worst_case(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @dummy()
+; CHECK-NEXT: ret void
+;
+ call void @dummy()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_explicit_worst_case() #9 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_explicit_worst_case(
+; CHECK-SAME: ) #[[ATTR11:[0-9]+]] {
+; CHECK-NEXT: call void @called_by_explicit_worst_case()
+; CHECK-NEXT: ret void
+;
+ call void @called_by_explicit_worst_case()
+ ret void
+}
+
+attributes #9 = {"amdgpu-max-num-workgroups"="4294967295,4294967295,4294967295"}
+
+;.
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-max-num-workgroups"="1,2,3" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-max-num-workgroups"="100,10,99" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-max-num-workgroups"="100,8,32" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-max-num-workgroups"="16,10,99" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-max-num-workgroups"="4294967295,1,1" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-max-num-workgroups"="1,4294967295,1" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-max-num-workgroups"="1,1,4294967295" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-max-num-workgroups"="42,99,123" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR9]] = { "amdgpu-max-num-workgroups"="256,128,1024" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR10]] = { "amdgpu-max-num-workgroups"="256,128,2048" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR11]] = { "amdgpu-max-num-workgroups"="4294967295,4294967295,4294967295" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+;.
>From 7fe147558fc9848175c286f7f311c2318f05af76 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 21 Oct 2024 16:59:28 -0700
Subject: [PATCH 2/4] Comment parameter name
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 58b90ddc36605c..b10100023f058a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -911,7 +911,9 @@ struct AAAMDMaxNumWorkgroups
};
bool AllCallSitesKnown = true;
- if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
+ if (!A.checkForAllCallSites(CheckCallSite, *this,
+ /*RequireAllCallSites=*/true,
+ AllCallSitesKnown))
return indicatePessimisticFixpoint();
return Change;
>From 8566c391c2392587ea09334c01af8c6e4760177e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 21 Oct 2024 17:03:32 -0700
Subject: [PATCH 3/4] Add a shader entry test
---
.../AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll
index bd1aa6e6ed4701..366432e0fc6cb1 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll
@@ -62,6 +62,16 @@ define amdgpu_kernel void @kernel_100_8_32() #1 {
ret void
}
+define amdgpu_cs void @amdgpu_cs_100_8_32() #1 {
+; CHECK-LABEL: define amdgpu_cs void @amdgpu_cs_100_8_32(
+; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-NEXT: call void @callee_merge_100_8_32__16_10_99()
+; CHECK-NEXT: ret void
+;
+ call void @callee_merge_100_8_32__16_10_99()
+ ret void
+}
+
attributes #1 = {"amdgpu-max-num-workgroups"="100,8,32"}
define amdgpu_kernel void @kernel_16_10_99() #2 {
>From b0cdf6fd0a8441ec9ab7af96e821000c7cd19743 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 21 Oct 2024 17:08:05 -0700
Subject: [PATCH 4/4] Check isValidState
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index b10100023f058a..151f33ada18862 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -902,7 +902,7 @@ struct AAAMDMaxNumWorkgroups
const auto *CallerInfo = A.getAAFor<AAAMDMaxNumWorkgroups>(
*this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
- if (!CallerInfo)
+ if (!CallerInfo || !CallerInfo->isValidState())
return false;
Change |=
More information about the llvm-branch-commits
mailing list