[llvm] [AMDGPU] Avoid resource propagation for recursion through multiple functions (PR #111004)
Janek van Oirschot via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 11 04:59:43 PDT 2024
https://github.com/JanekvO updated https://github.com/llvm/llvm-project/pull/111004
>From ded865a8b1a42de35d99612224825c591766d2c9 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Thu, 3 Oct 2024 16:13:46 +0100
Subject: [PATCH 1/6] [AMDGPU] Avoid metadata propagation for recursion through
multiple functions
---
.../Target/AMDGPU/AMDGPUMCResourceInfo.cpp | 7 +-
.../CodeGen/AMDGPU/agpr-register-count.ll | 2 +-
.../amdpal-metadata-agpr-register-count.ll | 2 +-
.../CodeGen/AMDGPU/function-resource-usage.ll | 73 ++++++++++++++++
.../hsa-metadata-agpr-register-count.ll | 2 +-
llvm/test/CodeGen/AMDGPU/ipra.ll | 4 +-
llvm/test/CodeGen/AMDGPU/recursion.ll | 18 ++--
llvm/test/CodeGen/AMDGPU/recursive-mcexpr.ll | 85 +++++++++++++++++++
llvm/test/CodeGen/AMDGPU/sibling-call.ll | 4 +-
9 files changed, 180 insertions(+), 17 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/recursive-mcexpr.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index da0397fa20bd1b..dd970c78e66e91 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -109,10 +109,13 @@ void MCResourceInfo::assignResourceInfoExpr(
for (const Function *Callee : Callees) {
if (!Seen.insert(Callee).second)
continue;
+ if (!F.doesNotRecurse() && !Callee->doesNotRecurse())
+ continue;
MCSymbol *CalleeValSym = getSymbol(Callee->getName(), RIK, OutContext);
ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
}
- SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext);
+ if (ArgExprs.size() > 1)
+ SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext);
}
MCSymbol *Sym = getSymbol(MF.getName(), RIK, OutContext);
Sym->setVariableValue(SymVal);
@@ -164,6 +167,8 @@ void MCResourceInfo::gatherResourceInfo(
for (const Function *Callee : FRI.Callees) {
if (!Seen.insert(Callee).second)
continue;
+ if (!MF.getFunction().doesNotRecurse() && !Callee->doesNotRecurse())
+ continue;
if (!Callee->isDeclaration()) {
MCSymbol *calleeValSym =
getSymbol(Callee->getName(), RIK_PrivateSegSize, OutContext);
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
index 0e16ea10c019ac..189b2d80827896 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
@@ -178,4 +178,4 @@ bb:
; GCN-NEXT: .set amdgpu.max_num_agpr, 32
; GCN-NEXT: .set amdgpu.max_num_sgpr, 34
-attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+attributes #0 = { nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
index 8f4cb364751d88..a01bfc6c8730da 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
@@ -77,4 +77,4 @@ bb:
; GFX908: agpr_count: 0x20
; GFX908: vgpr_count: 0x20
-attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" }
+attributes #0 = { nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,512" }
diff --git a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
index d3a6b4e01ebfb8..3c57689781378f 100644
--- a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
@@ -481,6 +481,79 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
ret void
}
+; GCN-LABEL: {{^}}multi_stage_recurse2:
+; GCN: .set multi_stage_recurse2.num_vgpr, 41
+; GCN: .set multi_stage_recurse2.num_agpr, 0
+; GCN: .set multi_stage_recurse2.numbered_sgpr, 34
+; GCN: .set multi_stage_recurse2.private_seg_size, 16
+; GCN: .set multi_stage_recurse2.uses_vcc, 1
+; GCN: .set multi_stage_recurse2.uses_flat_scratch, 0
+; GCN: .set multi_stage_recurse2.has_dyn_sized_stack, 0
+; GCN: .set multi_stage_recurse2.has_recursion, 1
+; GCN: .set multi_stage_recurse2.has_indirect_call, 0
+; GCN: TotalNumSgprs: 38
+; GCN: NumVgprs: 41
+; GCN: ScratchSize: 16
+; GCN-LABEL: {{^}}multi_stage_recurse1:
+; GCN: .set multi_stage_recurse1.num_vgpr, 41
+; GCN: .set multi_stage_recurse1.num_agpr, 0
+; GCN: .set multi_stage_recurse1.numbered_sgpr, 34
+; GCN: .set multi_stage_recurse1.private_seg_size, 16
+; GCN: .set multi_stage_recurse1.uses_vcc, 1
+; GCN: .set multi_stage_recurse1.uses_flat_scratch, 0
+; GCN: .set multi_stage_recurse1.has_dyn_sized_stack, 0
+; GCN: .set multi_stage_recurse1.has_recursion, 1
+; GCN: .set multi_stage_recurse1.has_indirect_call, 0
+; GCN: TotalNumSgprs: 38
+; GCN: NumVgprs: 41
+; GCN: ScratchSize: 16
+define void @multi_stage_recurse1(i32 %val) #2 {
+ call void @multi_stage_recurse2(i32 %val)
+ ret void
+}
+define void @multi_stage_recurse2(i32 %val) #2 {
+ call void @multi_stage_recurse1(i32 %val)
+ ret void
+}
+
+; GCN-LABEL: {{^}}usage_multi_stage_recurse:
+; GCN: .set usage_multi_stage_recurse.num_vgpr, max(32, multi_stage_recurse1.num_vgpr)
+; GCN: .set usage_multi_stage_recurse.num_agpr, max(0, multi_stage_recurse1.num_agpr)
+; GCN: .set usage_multi_stage_recurse.numbered_sgpr, max(33, multi_stage_recurse1.numbered_sgpr)
+; GCN: .set usage_multi_stage_recurse.private_seg_size, 0+(max(multi_stage_recurse1.private_seg_size))
+; GCN: .set usage_multi_stage_recurse.uses_vcc, or(1, multi_stage_recurse1.uses_vcc)
+; GCN: .set usage_multi_stage_recurse.uses_flat_scratch, or(1, multi_stage_recurse1.uses_flat_scratch)
+; GCN: .set usage_multi_stage_recurse.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack)
+; GCN: .set usage_multi_stage_recurse.has_recursion, or(1, multi_stage_recurse1.has_recursion)
+; GCN: .set usage_multi_stage_recurse.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
+; GCN: TotalNumSgprs: 40
+; GCN: NumVgprs: 41
+; GCN: ScratchSize: 16
+define amdgpu_kernel void @usage_multi_stage_recurse(i32 %n) #0 {
+ call void @multi_stage_recurse1(i32 %n)
+ ret void
+}
+
+; GCN-LABEL: {{^}}multi_call_with_multi_stage_recurse:
+; GCN: .set multi_call_with_multi_stage_recurse.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr, multi_stage_recurse1.num_vgpr)
+; GCN: .set multi_call_with_multi_stage_recurse.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr, multi_stage_recurse1.num_agpr)
+; GCN: .set multi_call_with_multi_stage_recurse.numbered_sgpr, max(43, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr, multi_stage_recurse1.numbered_sgpr)
+; GCN: .set multi_call_with_multi_stage_recurse.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size, multi_stage_recurse1.private_seg_size))
+; GCN: .set multi_call_with_multi_stage_recurse.uses_vcc, or(1, use_stack0.uses_vcc, use_stack1.uses_vcc, multi_stage_recurse1.uses_vcc)
+; GCN: .set multi_call_with_multi_stage_recurse.uses_flat_scratch, or(1, use_stack0.uses_flat_scratch, use_stack1.uses_flat_scratch, multi_stage_recurse1.uses_flat_scratch)
+; GCN: .set multi_call_with_multi_stage_recurse.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack, use_stack1.has_dyn_sized_stack, multi_stage_recurse1.has_dyn_sized_stack)
+; GCN: .set multi_call_with_multi_stage_recurse.has_recursion, or(1, use_stack0.has_recursion, use_stack1.has_recursion, multi_stage_recurse1.has_recursion)
+; GCN: .set multi_call_with_multi_stage_recurse.has_indirect_call, or(0, use_stack0.has_indirect_call, use_stack1.has_indirect_call, multi_stage_recurse1.has_indirect_call)
+; GCN: TotalNumSgprs: 49
+; GCN: NumVgprs: 41
+; GCN: ScratchSize: 2052
+define amdgpu_kernel void @multi_call_with_multi_stage_recurse(i32 %n) #0 {
+ call void @use_stack0()
+ call void @use_stack1()
+ call void @multi_stage_recurse1(i32 %n)
+ ret void
+}
+
; Make sure there's no assert when a sgpr96 is used.
; GCN-LABEL: {{^}}count_use_sgpr96_external_call
; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-agpr-register-count.ll
index 380a8e911e4995..fdc3f3957b828b 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-agpr-register-count.ll
@@ -98,4 +98,4 @@ bb:
ret void
}
-attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+attributes #0 = { nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
diff --git a/llvm/test/CodeGen/AMDGPU/ipra.ll b/llvm/test/CodeGen/AMDGPU/ipra.ll
index 957f404c8cdbed..85d23323ec6dec 100644
--- a/llvm/test/CodeGen/AMDGPU/ipra.ll
+++ b/llvm/test/CodeGen/AMDGPU/ipra.ll
@@ -131,6 +131,6 @@ bb:
declare dso_local void @eggs()
-attributes #0 = { nounwind }
-attributes #1 = { nounwind noinline "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+attributes #0 = { nounwind norecurse }
+attributes #1 = { nounwind noinline norecurse "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
attributes #2 = { norecurse }
diff --git a/llvm/test/CodeGen/AMDGPU/recursion.ll b/llvm/test/CodeGen/AMDGPU/recursion.ll
index c0d228e1254e64..479f46faf79d3e 100644
--- a/llvm/test/CodeGen/AMDGPU/recursion.ll
+++ b/llvm/test/CodeGen/AMDGPU/recursion.ll
@@ -41,11 +41,11 @@ define void @tail_recursive_with_stack() {
; For an arbitrary recursive call, report a large number for unknown stack
; usage for code object v4 and older
; CHECK-LABEL: {{^}}calls_recursive:
-; CHECK: .set calls_recursive.private_seg_size, 0+(max(16384, recursive.private_seg_size))
+; CHECK: .set calls_recursive.private_seg_size, 0+(max(16384))
;
; V5-LABEL: {{^}}calls_recursive:
-; V5: .set calls_recursive.private_seg_size, 0+(max(recursive.private_seg_size))
-; V5: .set calls_recursive.has_dyn_sized_stack, or(0, recursive.has_dyn_sized_stack)
+; V5: .set calls_recursive.private_seg_size, 0
+; V5: .set calls_recursive.has_dyn_sized_stack, 0
define amdgpu_kernel void @calls_recursive() {
call void @recursive()
ret void
@@ -65,22 +65,22 @@ define amdgpu_kernel void @kernel_indirectly_calls_tail_recursive() {
; in the kernel.
; CHECK-LABEL: {{^}}kernel_calls_tail_recursive:
-; CHECK: .set kernel_calls_tail_recursive.private_seg_size, 0+(max(16384, tail_recursive.private_seg_size))
+; CHECK: .set kernel_calls_tail_recursive.private_seg_size, 0+(max(16384))
;
; V5-LABEL: {{^}}kernel_calls_tail_recursive:
-; V5: .set kernel_calls_tail_recursive.private_seg_size, 0+(max(tail_recursive.private_seg_size))
-; V5: .set kernel_calls_tail_recursive.has_recursion, or(1, tail_recursive.has_recursion)
+; V5: .set kernel_calls_tail_recursive.private_seg_size, 0
+; V5: .set kernel_calls_tail_recursive.has_recursion, 1
define amdgpu_kernel void @kernel_calls_tail_recursive() {
call void @tail_recursive()
ret void
}
; CHECK-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
-; CHECK: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(16384, tail_recursive_with_stack.private_seg_size))
+; CHECK: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(16384))
;
; V5-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
-; V5: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(tail_recursive_with_stack.private_seg_size))
-; V5: .set kernel_calls_tail_recursive_with_stack.has_dyn_sized_stack, or(0, tail_recursive_with_stack.has_dyn_sized_stack)
+; V5: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0
+; V5: .set kernel_calls_tail_recursive_with_stack.has_dyn_sized_stack, 0
define amdgpu_kernel void @kernel_calls_tail_recursive_with_stack() {
call void @tail_recursive_with_stack()
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/recursive-mcexpr.ll b/llvm/test/CodeGen/AMDGPU/recursive-mcexpr.ll
new file mode 100644
index 00000000000000..630a0923e31287
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/recursive-mcexpr.ll
@@ -0,0 +1,85 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -mcpu=gfx90a < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}qux
+; CHECK: .set qux.num_vgpr, 41
+; CHECK: .set qux.num_agpr, 0
+; CHECK: .set qux.numbered_sgpr, 34
+; CHECK: .set qux.private_seg_size, 16
+; CHECK: .set qux.uses_vcc, 1
+; CHECK: .set qux.uses_flat_scratch, 0
+; CHECK: .set qux.has_dyn_sized_stack, 0
+; CHECK: .set qux.has_recursion, 1
+; CHECK: .set qux.has_indirect_call, 0
+
+; CHECK-LABEL: {{^}}baz
+; CHECK: .set baz.num_vgpr, 42
+; CHECK: .set baz.num_agpr, 0
+; CHECK: .set baz.numbered_sgpr, 34
+; CHECK: .set baz.private_seg_size, 16
+; CHECK: .set baz.uses_vcc, 1
+; CHECK: .set baz.uses_flat_scratch, 0
+; CHECK: .set baz.has_dyn_sized_stack, 0
+; CHECK: .set baz.has_recursion, 1
+; CHECK: .set baz.has_indirect_call, 0
+
+; CHECK-LABEL: {{^}}bar
+; CHECK: .set bar.num_vgpr, 42
+; CHECK: .set bar.num_agpr, 0
+; CHECK: .set bar.numbered_sgpr, 34
+; CHECK: .set bar.private_seg_size, 16
+; CHECK: .set bar.uses_vcc, 1
+; CHECK: .set bar.uses_flat_scratch, 0
+; CHECK: .set bar.has_dyn_sized_stack, 0
+; CHECK: .set bar.has_recursion, 1
+; CHECK: .set bar.has_indirect_call, 0
+
+; CHECK-LABEL: {{^}}foo
+; CHECK: .set foo.num_vgpr, 42
+; CHECK: .set foo.num_agpr, 0
+; CHECK: .set foo.numbered_sgpr, 34
+; CHECK: .set foo.private_seg_size, 16
+; CHECK: .set foo.uses_vcc, 1
+; CHECK: .set foo.uses_flat_scratch, 0
+; CHECK: .set foo.has_dyn_sized_stack, 0
+; CHECK: .set foo.has_recursion, 1
+; CHECK: .set foo.has_indirect_call, 0
+
+define void @foo() {
+entry:
+ call void @bar()
+ ret void
+}
+
+define void @bar() {
+entry:
+ call void @baz()
+ ret void
+}
+
+define void @baz() {
+entry:
+ call void @qux()
+ ret void
+}
+
+define void @qux() {
+entry:
+ call void @foo()
+ ret void
+}
+
+; CHECK-LABEL: {{^}}usefoo
+; CHECK: .set usefoo.num_vgpr, 32
+; CHECK: .set usefoo.num_agpr, 0
+; CHECK: .set usefoo.numbered_sgpr, 33
+; CHECK: .set usefoo.private_seg_size, 0
+; CHECK: .set usefoo.uses_vcc, 1
+; CHECK: .set usefoo.uses_flat_scratch, 1
+; CHECK: .set usefoo.has_dyn_sized_stack, 0
+; CHECK: .set usefoo.has_recursion, 1
+; CHECK: .set usefoo.has_indirect_call, 0
+define amdgpu_kernel void @usefoo() {
+ call void @foo()
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index 5536a09538e6ee..4639bf4a678d49 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -468,5 +468,5 @@ entry:
ret <2 x i64> %ret
}
-attributes #0 = { nounwind }
-attributes #1 = { nounwind noinline "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+attributes #0 = { nounwind norecurse }
+attributes #1 = { nounwind noinline norecurse "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
>From e217863fc4c2ae16dffd1a4c365f5fa687fb8758 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Fri, 4 Oct 2024 17:05:30 +0100
Subject: [PATCH 2/6] Different approach, now walks over the sub-symbols and
searches for uses of the symbol we're trying to define
---
.../Target/AMDGPU/AMDGPUMCResourceInfo.cpp | 81 ++++++++++++++++--
.../CodeGen/AMDGPU/agpr-register-count.ll | 2 +-
.../amdpal-metadata-agpr-register-count.ll | 2 +-
.../CodeGen/AMDGPU/function-resource-usage.ll | 77 ++++++++++++++---
.../hsa-metadata-agpr-register-count.ll | 2 +-
llvm/test/CodeGen/AMDGPU/ipra.ll | 4 +-
llvm/test/CodeGen/AMDGPU/recursion.ll | 18 ++--
llvm/test/CodeGen/AMDGPU/recursive-mcexpr.ll | 85 -------------------
.../AMDGPU/recursive-resource-usage-mcexpr.ll | 85 +++++++++++++++++++
llvm/test/CodeGen/AMDGPU/sibling-call.ll | 4 +-
10 files changed, 238 insertions(+), 122 deletions(-)
delete mode 100644 llvm/test/CodeGen/AMDGPU/recursive-mcexpr.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/recursive-resource-usage-mcexpr.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index dd970c78e66e91..4e88f3f36a3104 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -91,6 +91,58 @@ MCSymbol *MCResourceInfo::getMaxSGPRSymbol(MCContext &OutContext) {
return OutContext.getOrCreateSymbol("amdgpu.max_num_sgpr");
}
+static bool findSymbolInExpr(MCSymbol *Sym, const MCExpr *Expr,
+ SmallVectorImpl<const MCExpr *> &Exprs) {
+ switch (Expr->getKind()) {
+ default:
+ return false;
+ case MCExpr::ExprKind::SymbolRef: {
+ const MCSymbolRefExpr *SymRefExpr = cast<MCSymbolRefExpr>(Expr);
+ const MCSymbol &SymRef = SymRefExpr->getSymbol();
+ if (Sym == &SymRef)
+ return true;
+ if (SymRef.isVariable())
+ Exprs.push_back(SymRef.getVariableValue(/*isUsed=*/false));
+ return false;
+ }
+ case MCExpr::ExprKind::Binary: {
+ const MCBinaryExpr *BExpr = cast<MCBinaryExpr>(Expr);
+ return findSymbolInExpr(Sym, BExpr->getLHS(), Exprs) ||
+ findSymbolInExpr(Sym, BExpr->getRHS(), Exprs);
+ }
+ case MCExpr::ExprKind::Unary: {
+ const MCUnaryExpr *UExpr = cast<MCUnaryExpr>(Expr);
+ return findSymbolInExpr(Sym, UExpr->getSubExpr(), Exprs);
+ }
+ case MCExpr::ExprKind::Target: {
+ const AMDGPUMCExpr *AGVK = cast<AMDGPUMCExpr>(Expr);
+ for (const MCExpr *E : AGVK->getArgs()) {
+ if (findSymbolInExpr(Sym, E, Exprs))
+ return true;
+ }
+ return false;
+ }
+ }
+}
+
+// Symbols whose values eventually are used through their defines (i.e.,
+// recursive) must be avoided. Do a walk over Expr to see if Sym will occur in
+// it. The Expr is an MCExpr given through a callee's equivalent MCSymbol so if
+// no recursion is found Sym can be safely assigned to a (sub-)expr which
+// contains the symbol Expr is associated with.
+static bool foundRecursiveSymbolDef(MCSymbol *Sym, const MCExpr *Expr) {
+ SmallVector<const MCExpr *, 8> WorkList;
+ WorkList.push_back(Expr);
+
+ while (!WorkList.empty()) {
+ const MCExpr *CurExpr = WorkList.pop_back_val();
+ if (findSymbolInExpr(Sym, CurExpr, WorkList))
+ return true;
+ }
+
+ return false;
+}
+
void MCResourceInfo::assignResourceInfoExpr(
int64_t LocalValue, ResourceInfoKind RIK, AMDGPUMCExpr::VariantKind Kind,
const MachineFunction &MF, const SmallVectorImpl<const Function *> &Callees,
@@ -98,6 +150,7 @@ void MCResourceInfo::assignResourceInfoExpr(
const MCConstantExpr *LocalConstExpr =
MCConstantExpr::create(LocalValue, OutContext);
const MCExpr *SymVal = LocalConstExpr;
+ MCSymbol *Sym = getSymbol(MF.getName(), RIK, OutContext);
if (!Callees.empty()) {
SmallVector<const MCExpr *, 8> ArgExprs;
// Avoid recursive symbol assignment.
@@ -109,15 +162,19 @@ void MCResourceInfo::assignResourceInfoExpr(
for (const Function *Callee : Callees) {
if (!Seen.insert(Callee).second)
continue;
- if (!F.doesNotRecurse() && !Callee->doesNotRecurse())
- continue;
MCSymbol *CalleeValSym = getSymbol(Callee->getName(), RIK, OutContext);
- ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
+ if (CalleeValSym->isVariable()) {
+ if (!foundRecursiveSymbolDef(
+ Sym, CalleeValSym->getVariableValue(/*isUsed=*/false))) {
+ ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
+ }
+ } else {
+ ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
+ }
}
if (ArgExprs.size() > 1)
SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext);
}
- MCSymbol *Sym = getSymbol(MF.getName(), RIK, OutContext);
Sym->setVariableValue(SymVal);
}
@@ -158,6 +215,7 @@ void MCResourceInfo::gatherResourceInfo(
// The expression for private segment size should be: FRI.PrivateSegmentSize
// + max(FRI.Callees, FRI.CalleeSegmentSize)
SmallVector<const MCExpr *, 8> ArgExprs;
+ MCSymbol *Sym = getSymbol(MF.getName(), RIK_PrivateSegSize, OutContext);
if (FRI.CalleeSegmentSize)
ArgExprs.push_back(
MCConstantExpr::create(FRI.CalleeSegmentSize, OutContext));
@@ -167,12 +225,18 @@ void MCResourceInfo::gatherResourceInfo(
for (const Function *Callee : FRI.Callees) {
if (!Seen.insert(Callee).second)
continue;
- if (!MF.getFunction().doesNotRecurse() && !Callee->doesNotRecurse())
- continue;
if (!Callee->isDeclaration()) {
MCSymbol *calleeValSym =
getSymbol(Callee->getName(), RIK_PrivateSegSize, OutContext);
- ArgExprs.push_back(MCSymbolRefExpr::create(calleeValSym, OutContext));
+ if (calleeValSym->isVariable()) {
+ if (!foundRecursiveSymbolDef(
+ Sym, calleeValSym->getVariableValue(/*isUsed=*/false))) {
+ ArgExprs.push_back(
+ MCSymbolRefExpr::create(calleeValSym, OutContext));
+ }
+ } else {
+ ArgExprs.push_back(MCSymbolRefExpr::create(calleeValSym, OutContext));
+ }
}
}
const MCExpr *localConstExpr =
@@ -183,8 +247,7 @@ void MCResourceInfo::gatherResourceInfo(
localConstExpr =
MCBinaryExpr::createAdd(localConstExpr, transitiveExpr, OutContext);
}
- getSymbol(MF.getName(), RIK_PrivateSegSize, OutContext)
- ->setVariableValue(localConstExpr);
+ Sym->setVariableValue(localConstExpr);
}
auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) {
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
index 189b2d80827896..0e16ea10c019ac 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
@@ -178,4 +178,4 @@ bb:
; GCN-NEXT: .set amdgpu.max_num_agpr, 32
; GCN-NEXT: .set amdgpu.max_num_sgpr, 34
-attributes #0 = { nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
index a01bfc6c8730da..8f4cb364751d88 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
@@ -77,4 +77,4 @@ bb:
; GFX908: agpr_count: 0x20
; GFX908: vgpr_count: 0x20
-attributes #0 = { nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,512" }
+attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" }
diff --git a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
index 3c57689781378f..c8cf7d7e535b33 100644
--- a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
@@ -482,18 +482,18 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
}
; GCN-LABEL: {{^}}multi_stage_recurse2:
-; GCN: .set multi_stage_recurse2.num_vgpr, 41
-; GCN: .set multi_stage_recurse2.num_agpr, 0
-; GCN: .set multi_stage_recurse2.numbered_sgpr, 34
-; GCN: .set multi_stage_recurse2.private_seg_size, 16
-; GCN: .set multi_stage_recurse2.uses_vcc, 1
-; GCN: .set multi_stage_recurse2.uses_flat_scratch, 0
-; GCN: .set multi_stage_recurse2.has_dyn_sized_stack, 0
-; GCN: .set multi_stage_recurse2.has_recursion, 1
-; GCN: .set multi_stage_recurse2.has_indirect_call, 0
-; GCN: TotalNumSgprs: 38
-; GCN: NumVgprs: 41
-; GCN: ScratchSize: 16
+; GCN: .set multi_stage_recurse2.num_vgpr, max(41, multi_stage_recurse1.num_vgpr)
+; GCN: .set multi_stage_recurse2.num_agpr, max(0, multi_stage_recurse1.num_agpr)
+; GCN: .set multi_stage_recurse2.numbered_sgpr, max(34, multi_stage_recurse1.numbered_sgpr)
+; GCN: .set multi_stage_recurse2.private_seg_size, 16+(max(multi_stage_recurse1.private_seg_size))
+; GCN: .set multi_stage_recurse2.uses_vcc, or(1, multi_stage_recurse1.uses_vcc)
+; GCN: .set multi_stage_recurse2.uses_flat_scratch, or(0, multi_stage_recurse1.uses_flat_scratch)
+; GCN: .set multi_stage_recurse2.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack)
+; GCN: .set multi_stage_recurse2.has_recursion, or(1, multi_stage_recurse1.has_recursion)
+; GCN: .set multi_stage_recurse2.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
+; GCN: TotalNumSgprs: multi_stage_recurse2.numbered_sgpr+(extrasgprs(multi_stage_recurse2.uses_vcc, multi_stage_recurse2.uses_flat_scratch, 1))
+; GCN: NumVgprs: max(41, multi_stage_recurse1.num_vgpr)
+; GCN: ScratchSize: 16+(max(multi_stage_recurse1.private_seg_size))
; GCN-LABEL: {{^}}multi_stage_recurse1:
; GCN: .set multi_stage_recurse1.num_vgpr, 41
; GCN: .set multi_stage_recurse1.num_agpr, 0
@@ -534,6 +534,59 @@ define amdgpu_kernel void @usage_multi_stage_recurse(i32 %n) #0 {
ret void
}
+; GCN-LABEL: {{^}}multi_stage_recurse_noattr2:
+; GCN: .set multi_stage_recurse_noattr2.num_vgpr, max(41, multi_stage_recurse_noattr1.num_vgpr)
+; GCN: .set multi_stage_recurse_noattr2.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr)
+; GCN: .set multi_stage_recurse_noattr2.numbered_sgpr, max(34, multi_stage_recurse_noattr1.numbered_sgpr)
+; GCN: .set multi_stage_recurse_noattr2.private_seg_size, 16+(max(multi_stage_recurse_noattr1.private_seg_size))
+; GCN: .set multi_stage_recurse_noattr2.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc)
+; GCN: .set multi_stage_recurse_noattr2.uses_flat_scratch, or(0, multi_stage_recurse_noattr1.uses_flat_scratch)
+; GCN: .set multi_stage_recurse_noattr2.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack)
+; GCN: .set multi_stage_recurse_noattr2.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion)
+; GCN: .set multi_stage_recurse_noattr2.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call)
+; GCN: TotalNumSgprs: multi_stage_recurse_noattr2.numbered_sgpr+(extrasgprs(multi_stage_recurse_noattr2.uses_vcc, multi_stage_recurse_noattr2.uses_flat_scratch, 1))
+; GCN: NumVgprs: max(41, multi_stage_recurse_noattr1.num_vgpr)
+; GCN: ScratchSize: 16+(max(multi_stage_recurse_noattr1.private_seg_size))
+; GCN-LABEL: {{^}}multi_stage_recurse_noattr1:
+; GCN: .set multi_stage_recurse_noattr1.num_vgpr, 41
+; GCN: .set multi_stage_recurse_noattr1.num_agpr, 0
+; GCN: .set multi_stage_recurse_noattr1.numbered_sgpr, 34
+; GCN: .set multi_stage_recurse_noattr1.private_seg_size, 16
+; GCN: .set multi_stage_recurse_noattr1.uses_vcc, 1
+; GCN: .set multi_stage_recurse_noattr1.uses_flat_scratch, 0
+; GCN: .set multi_stage_recurse_noattr1.has_dyn_sized_stack, 0
+; GCN: .set multi_stage_recurse_noattr1.has_recursion, 0
+; GCN: .set multi_stage_recurse_noattr1.has_indirect_call, 0
+; GCN: TotalNumSgprs: 38
+; GCN: NumVgprs: 41
+; GCN: ScratchSize: 16
+define void @multi_stage_recurse_noattr1(i32 %val) #0 {
+ call void @multi_stage_recurse_noattr2(i32 %val)
+ ret void
+}
+define void @multi_stage_recurse_noattr2(i32 %val) #0 {
+ call void @multi_stage_recurse_noattr1(i32 %val)
+ ret void
+}
+
+; GCN-LABEL: {{^}}usage_multi_stage_recurse_noattrs:
+; GCN: .set usage_multi_stage_recurse_noattrs.num_vgpr, max(32, multi_stage_recurse_noattr1.num_vgpr)
+; GCN: .set usage_multi_stage_recurse_noattrs.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr)
+; GCN: .set usage_multi_stage_recurse_noattrs.numbered_sgpr, max(33, multi_stage_recurse_noattr1.numbered_sgpr)
+; GCN: .set usage_multi_stage_recurse_noattrs.private_seg_size, 0+(max(multi_stage_recurse_noattr1.private_seg_size))
+; GCN: .set usage_multi_stage_recurse_noattrs.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc)
+; GCN: .set usage_multi_stage_recurse_noattrs.uses_flat_scratch, or(1, multi_stage_recurse_noattr1.uses_flat_scratch)
+; GCN: .set usage_multi_stage_recurse_noattrs.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack)
+; GCN: .set usage_multi_stage_recurse_noattrs.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion)
+; GCN: .set usage_multi_stage_recurse_noattrs.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call)
+; GCN: TotalNumSgprs: 40
+; GCN: NumVgprs: 41
+; GCN: ScratchSize: 16
+define amdgpu_kernel void @usage_multi_stage_recurse_noattrs(i32 %n) #0 {
+ call void @multi_stage_recurse_noattr1(i32 %n)
+ ret void
+}
+
; GCN-LABEL: {{^}}multi_call_with_multi_stage_recurse:
; GCN: .set multi_call_with_multi_stage_recurse.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr, multi_stage_recurse1.num_vgpr)
; GCN: .set multi_call_with_multi_stage_recurse.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr, multi_stage_recurse1.num_agpr)
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-agpr-register-count.ll
index fdc3f3957b828b..380a8e911e4995 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-agpr-register-count.ll
@@ -98,4 +98,4 @@ bb:
ret void
}
-attributes #0 = { nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
diff --git a/llvm/test/CodeGen/AMDGPU/ipra.ll b/llvm/test/CodeGen/AMDGPU/ipra.ll
index 85d23323ec6dec..957f404c8cdbed 100644
--- a/llvm/test/CodeGen/AMDGPU/ipra.ll
+++ b/llvm/test/CodeGen/AMDGPU/ipra.ll
@@ -131,6 +131,6 @@ bb:
declare dso_local void @eggs()
-attributes #0 = { nounwind norecurse }
-attributes #1 = { nounwind noinline norecurse "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+attributes #0 = { nounwind }
+attributes #1 = { nounwind noinline "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
attributes #2 = { norecurse }
diff --git a/llvm/test/CodeGen/AMDGPU/recursion.ll b/llvm/test/CodeGen/AMDGPU/recursion.ll
index 479f46faf79d3e..c0d228e1254e64 100644
--- a/llvm/test/CodeGen/AMDGPU/recursion.ll
+++ b/llvm/test/CodeGen/AMDGPU/recursion.ll
@@ -41,11 +41,11 @@ define void @tail_recursive_with_stack() {
; For an arbitrary recursive call, report a large number for unknown stack
; usage for code object v4 and older
; CHECK-LABEL: {{^}}calls_recursive:
-; CHECK: .set calls_recursive.private_seg_size, 0+(max(16384))
+; CHECK: .set calls_recursive.private_seg_size, 0+(max(16384, recursive.private_seg_size))
;
; V5-LABEL: {{^}}calls_recursive:
-; V5: .set calls_recursive.private_seg_size, 0
-; V5: .set calls_recursive.has_dyn_sized_stack, 0
+; V5: .set calls_recursive.private_seg_size, 0+(max(recursive.private_seg_size))
+; V5: .set calls_recursive.has_dyn_sized_stack, or(0, recursive.has_dyn_sized_stack)
define amdgpu_kernel void @calls_recursive() {
call void @recursive()
ret void
@@ -65,22 +65,22 @@ define amdgpu_kernel void @kernel_indirectly_calls_tail_recursive() {
; in the kernel.
; CHECK-LABEL: {{^}}kernel_calls_tail_recursive:
-; CHECK: .set kernel_calls_tail_recursive.private_seg_size, 0+(max(16384))
+; CHECK: .set kernel_calls_tail_recursive.private_seg_size, 0+(max(16384, tail_recursive.private_seg_size))
;
; V5-LABEL: {{^}}kernel_calls_tail_recursive:
-; V5: .set kernel_calls_tail_recursive.private_seg_size, 0
-; V5: .set kernel_calls_tail_recursive.has_recursion, 1
+; V5: .set kernel_calls_tail_recursive.private_seg_size, 0+(max(tail_recursive.private_seg_size))
+; V5: .set kernel_calls_tail_recursive.has_recursion, or(1, tail_recursive.has_recursion)
define amdgpu_kernel void @kernel_calls_tail_recursive() {
call void @tail_recursive()
ret void
}
; CHECK-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
-; CHECK: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(16384))
+; CHECK: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(16384, tail_recursive_with_stack.private_seg_size))
;
; V5-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
-; V5: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0
-; V5: .set kernel_calls_tail_recursive_with_stack.has_dyn_sized_stack, 0
+; V5: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(tail_recursive_with_stack.private_seg_size))
+; V5: .set kernel_calls_tail_recursive_with_stack.has_dyn_sized_stack, or(0, tail_recursive_with_stack.has_dyn_sized_stack)
define amdgpu_kernel void @kernel_calls_tail_recursive_with_stack() {
call void @tail_recursive_with_stack()
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/recursive-mcexpr.ll b/llvm/test/CodeGen/AMDGPU/recursive-mcexpr.ll
deleted file mode 100644
index 630a0923e31287..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/recursive-mcexpr.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -mcpu=gfx90a < %s | FileCheck %s
-
-; CHECK-LABEL: {{^}}qux
-; CHECK: .set qux.num_vgpr, 41
-; CHECK: .set qux.num_agpr, 0
-; CHECK: .set qux.numbered_sgpr, 34
-; CHECK: .set qux.private_seg_size, 16
-; CHECK: .set qux.uses_vcc, 1
-; CHECK: .set qux.uses_flat_scratch, 0
-; CHECK: .set qux.has_dyn_sized_stack, 0
-; CHECK: .set qux.has_recursion, 1
-; CHECK: .set qux.has_indirect_call, 0
-
-; CHECK-LABEL: {{^}}baz
-; CHECK: .set baz.num_vgpr, 42
-; CHECK: .set baz.num_agpr, 0
-; CHECK: .set baz.numbered_sgpr, 34
-; CHECK: .set baz.private_seg_size, 16
-; CHECK: .set baz.uses_vcc, 1
-; CHECK: .set baz.uses_flat_scratch, 0
-; CHECK: .set baz.has_dyn_sized_stack, 0
-; CHECK: .set baz.has_recursion, 1
-; CHECK: .set baz.has_indirect_call, 0
-
-; CHECK-LABEL: {{^}}bar
-; CHECK: .set bar.num_vgpr, 42
-; CHECK: .set bar.num_agpr, 0
-; CHECK: .set bar.numbered_sgpr, 34
-; CHECK: .set bar.private_seg_size, 16
-; CHECK: .set bar.uses_vcc, 1
-; CHECK: .set bar.uses_flat_scratch, 0
-; CHECK: .set bar.has_dyn_sized_stack, 0
-; CHECK: .set bar.has_recursion, 1
-; CHECK: .set bar.has_indirect_call, 0
-
-; CHECK-LABEL: {{^}}foo
-; CHECK: .set foo.num_vgpr, 42
-; CHECK: .set foo.num_agpr, 0
-; CHECK: .set foo.numbered_sgpr, 34
-; CHECK: .set foo.private_seg_size, 16
-; CHECK: .set foo.uses_vcc, 1
-; CHECK: .set foo.uses_flat_scratch, 0
-; CHECK: .set foo.has_dyn_sized_stack, 0
-; CHECK: .set foo.has_recursion, 1
-; CHECK: .set foo.has_indirect_call, 0
-
-define void @foo() {
-entry:
- call void @bar()
- ret void
-}
-
-define void @bar() {
-entry:
- call void @baz()
- ret void
-}
-
-define void @baz() {
-entry:
- call void @qux()
- ret void
-}
-
-define void @qux() {
-entry:
- call void @foo()
- ret void
-}
-
-; CHECK-LABEL: {{^}}usefoo
-; CHECK: .set usefoo.num_vgpr, 32
-; CHECK: .set usefoo.num_agpr, 0
-; CHECK: .set usefoo.numbered_sgpr, 33
-; CHECK: .set usefoo.private_seg_size, 0
-; CHECK: .set usefoo.uses_vcc, 1
-; CHECK: .set usefoo.uses_flat_scratch, 1
-; CHECK: .set usefoo.has_dyn_sized_stack, 0
-; CHECK: .set usefoo.has_recursion, 1
-; CHECK: .set usefoo.has_indirect_call, 0
-define amdgpu_kernel void @usefoo() {
- call void @foo()
- ret void
-}
-
diff --git a/llvm/test/CodeGen/AMDGPU/recursive-resource-usage-mcexpr.ll b/llvm/test/CodeGen/AMDGPU/recursive-resource-usage-mcexpr.ll
new file mode 100644
index 00000000000000..7e1090afc0cf1a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/recursive-resource-usage-mcexpr.ll
@@ -0,0 +1,85 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}qux
+; CHECK: .set qux.num_vgpr, max(41, foo.num_vgpr)
+; CHECK: .set qux.num_agpr, max(0, foo.num_agpr)
+; CHECK: .set qux.numbered_sgpr, max(34, foo.numbered_sgpr)
+; CHECK: .set qux.private_seg_size, 16
+; CHECK: .set qux.uses_vcc, or(1, foo.uses_vcc)
+; CHECK: .set qux.uses_flat_scratch, or(0, foo.uses_flat_scratch)
+; CHECK: .set qux.has_dyn_sized_stack, or(0, foo.has_dyn_sized_stack)
+; CHECK: .set qux.has_recursion, or(1, foo.has_recursion)
+; CHECK: .set qux.has_indirect_call, or(0, foo.has_indirect_call)
+
+; CHECK-LABEL: {{^}}baz
+; CHECK: .set baz.num_vgpr, max(42, qux.num_vgpr)
+; CHECK: .set baz.num_agpr, max(0, qux.num_agpr)
+; CHECK: .set baz.numbered_sgpr, max(34, qux.numbered_sgpr)
+; CHECK: .set baz.private_seg_size, 16+(max(qux.private_seg_size))
+; CHECK: .set baz.uses_vcc, or(1, qux.uses_vcc)
+; CHECK: .set baz.uses_flat_scratch, or(0, qux.uses_flat_scratch)
+; CHECK: .set baz.has_dyn_sized_stack, or(0, qux.has_dyn_sized_stack)
+; CHECK: .set baz.has_recursion, or(1, qux.has_recursion)
+; CHECK: .set baz.has_indirect_call, or(0, qux.has_indirect_call)
+
+; CHECK-LABEL: {{^}}bar
+; CHECK: .set bar.num_vgpr, max(42, baz.num_vgpr)
+; CHECK: .set bar.num_agpr, max(0, baz.num_agpr)
+; CHECK: .set bar.numbered_sgpr, max(34, baz.numbered_sgpr)
+; CHECK: .set bar.private_seg_size, 16+(max(baz.private_seg_size))
+; CHECK: .set bar.uses_vcc, or(1, baz.uses_vcc)
+; CHECK: .set bar.uses_flat_scratch, or(0, baz.uses_flat_scratch)
+; CHECK: .set bar.has_dyn_sized_stack, or(0, baz.has_dyn_sized_stack)
+; CHECK: .set bar.has_recursion, or(1, baz.has_recursion)
+; CHECK: .set bar.has_indirect_call, or(0, baz.has_indirect_call)
+
+; CHECK-LABEL: {{^}}foo
+; CHECK: .set foo.num_vgpr, 42
+; CHECK: .set foo.num_agpr, 0
+; CHECK: .set foo.numbered_sgpr, 34
+; CHECK: .set foo.private_seg_size, 16
+; CHECK: .set foo.uses_vcc, 1
+; CHECK: .set foo.uses_flat_scratch, 0
+; CHECK: .set foo.has_dyn_sized_stack, 0
+; CHECK: .set foo.has_recursion, 1
+; CHECK: .set foo.has_indirect_call, 0
+
+define void @foo() {
+entry:
+ call void @bar()
+ ret void
+}
+
+define void @bar() {
+entry:
+ call void @baz()
+ ret void
+}
+
+define void @baz() {
+entry:
+ call void @qux()
+ ret void
+}
+
+define void @qux() {
+entry:
+ call void @foo()
+ ret void
+}
+
+; CHECK-LABEL: {{^}}usefoo
+; CHECK: .set usefoo.num_vgpr, max(32, foo.num_vgpr)
+; CHECK: .set usefoo.num_agpr, max(0, foo.num_agpr)
+; CHECK: .set usefoo.numbered_sgpr, max(33, foo.numbered_sgpr)
+; CHECK: .set usefoo.private_seg_size, 0+(max(foo.private_seg_size))
+; CHECK: .set usefoo.uses_vcc, or(1, foo.uses_vcc)
+; CHECK: .set usefoo.uses_flat_scratch, or(1, foo.uses_flat_scratch)
+; CHECK: .set usefoo.has_dyn_sized_stack, or(0, foo.has_dyn_sized_stack)
+; CHECK: .set usefoo.has_recursion, or(1, foo.has_recursion)
+; CHECK: .set usefoo.has_indirect_call, or(0, foo.has_indirect_call)
+define amdgpu_kernel void @usefoo() {
+ call void @foo()
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index 4639bf4a678d49..5536a09538e6ee 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -468,5 +468,5 @@ entry:
ret <2 x i64> %ret
}
-attributes #0 = { nounwind norecurse }
-attributes #1 = { nounwind noinline norecurse "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+attributes #0 = { nounwind }
+attributes #1 = { nounwind noinline "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
>From 2dc357cfdf06d4ecc6b26dcfde64f3bf0009fa79 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Wed, 9 Oct 2024 13:11:16 +0100
Subject: [PATCH 3/6] Comments, Visited set with assert to make sure no
recursion exists in the expression
---
.../Target/AMDGPU/AMDGPUMCResourceInfo.cpp | 22 ++++++++++++++-----
1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index 4e88f3f36a3104..0919c0b2a547bc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -91,8 +91,17 @@ MCSymbol *MCResourceInfo::getMaxSGPRSymbol(MCContext &OutContext) {
return OutContext.getOrCreateSymbol("amdgpu.max_num_sgpr");
}
+// The (partially complete) expression should have no recursion in it. After
+// all, we're trying to avoid recursion using this codepath.
static bool findSymbolInExpr(MCSymbol *Sym, const MCExpr *Expr,
- SmallVectorImpl<const MCExpr *> &Exprs) {
+ SmallVectorImpl<const MCExpr *> &Exprs,
+ SmallPtrSetImpl<const MCExpr *> &Visited) {
+ // Assert if any of the expressions is already visited (i.e., there is
+ // existing recursion).
+ assert(!Visited.contains(Expr) &&
+ "Expr should not exist in Visited as we're avoiding recursion");
+ Visited.insert(Expr);
+
switch (Expr->getKind()) {
default:
return false;
@@ -107,17 +116,17 @@ static bool findSymbolInExpr(MCSymbol *Sym, const MCExpr *Expr,
}
case MCExpr::ExprKind::Binary: {
const MCBinaryExpr *BExpr = cast<MCBinaryExpr>(Expr);
- return findSymbolInExpr(Sym, BExpr->getLHS(), Exprs) ||
- findSymbolInExpr(Sym, BExpr->getRHS(), Exprs);
+ return findSymbolInExpr(Sym, BExpr->getLHS(), Exprs, Visited) ||
+ findSymbolInExpr(Sym, BExpr->getRHS(), Exprs, Visited);
}
case MCExpr::ExprKind::Unary: {
const MCUnaryExpr *UExpr = cast<MCUnaryExpr>(Expr);
- return findSymbolInExpr(Sym, UExpr->getSubExpr(), Exprs);
+ return findSymbolInExpr(Sym, UExpr->getSubExpr(), Exprs, Visited);
}
case MCExpr::ExprKind::Target: {
const AMDGPUMCExpr *AGVK = cast<AMDGPUMCExpr>(Expr);
for (const MCExpr *E : AGVK->getArgs()) {
- if (findSymbolInExpr(Sym, E, Exprs))
+ if (findSymbolInExpr(Sym, E, Exprs, Visited))
return true;
}
return false;
@@ -132,11 +141,12 @@ static bool findSymbolInExpr(MCSymbol *Sym, const MCExpr *Expr,
// contains the symbol Expr is associated with.
static bool foundRecursiveSymbolDef(MCSymbol *Sym, const MCExpr *Expr) {
SmallVector<const MCExpr *, 8> WorkList;
+ SmallPtrSet<const MCExpr *, 8> Visited;
WorkList.push_back(Expr);
while (!WorkList.empty()) {
const MCExpr *CurExpr = WorkList.pop_back_val();
- if (findSymbolInExpr(Sym, CurExpr, WorkList))
+ if (findSymbolInExpr(Sym, CurExpr, WorkList, Visited))
return true;
}
>From 10382ead2075d0da1befdf90305bc2f65b0f0196 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Thu, 10 Oct 2024 15:34:37 +0100
Subject: [PATCH 4/6] Feedback, use WorkList instead of recurse,
llvm_unreachable instead of assert
---
.../Target/AMDGPU/AMDGPUMCResourceInfo.cpp | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index 0919c0b2a547bc..9b2beb44ead9f1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -98,9 +98,8 @@ static bool findSymbolInExpr(MCSymbol *Sym, const MCExpr *Expr,
SmallPtrSetImpl<const MCExpr *> &Visited) {
// Assert if any of the expressions is already visited (i.e., there is
// existing recursion).
- assert(!Visited.contains(Expr) &&
- "Expr should not exist in Visited as we're avoiding recursion");
- Visited.insert(Expr);
+ if (!Visited.insert(Expr).second)
+ llvm_unreachable("already visited expression");
switch (Expr->getKind()) {
default:
@@ -116,19 +115,19 @@ static bool findSymbolInExpr(MCSymbol *Sym, const MCExpr *Expr,
}
case MCExpr::ExprKind::Binary: {
const MCBinaryExpr *BExpr = cast<MCBinaryExpr>(Expr);
- return findSymbolInExpr(Sym, BExpr->getLHS(), Exprs, Visited) ||
- findSymbolInExpr(Sym, BExpr->getRHS(), Exprs, Visited);
+ Exprs.push_back(BExpr->getLHS());
+ Exprs.push_back(BExpr->getRHS());
+ return false;
}
case MCExpr::ExprKind::Unary: {
const MCUnaryExpr *UExpr = cast<MCUnaryExpr>(Expr);
- return findSymbolInExpr(Sym, UExpr->getSubExpr(), Exprs, Visited);
+ Exprs.push_back(UExpr->getSubExpr());
+ return false;
}
case MCExpr::ExprKind::Target: {
const AMDGPUMCExpr *AGVK = cast<AMDGPUMCExpr>(Expr);
- for (const MCExpr *E : AGVK->getArgs()) {
- if (findSymbolInExpr(Sym, E, Exprs, Visited))
- return true;
- }
+ for (const MCExpr *E : AGVK->getArgs())
+ Exprs.push_back(E);
return false;
}
}
>From 454c3174f781fd1b8bd7ca271d319103c9634207 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Thu, 10 Oct 2024 21:41:29 +0100
Subject: [PATCH 5/6] Feedback, comments, merge nested conditional
---
.../Target/AMDGPU/AMDGPUMCResourceInfo.cpp | 27 +++++++------------
1 file changed, 10 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index 9b2beb44ead9f1..1692e7bbf73409 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -92,7 +92,8 @@ MCSymbol *MCResourceInfo::getMaxSGPRSymbol(MCContext &OutContext) {
}
// The (partially complete) expression should have no recursion in it. After
-// all, we're trying to avoid recursion using this codepath.
+// all, we're trying to avoid recursion using this codepath. Returns true if
+// Sym is found within Expr without recursing on Expr, false otherwise.
static bool findSymbolInExpr(MCSymbol *Sym, const MCExpr *Expr,
SmallVectorImpl<const MCExpr *> &Exprs,
SmallPtrSetImpl<const MCExpr *> &Visited) {
@@ -137,7 +138,8 @@ static bool findSymbolInExpr(MCSymbol *Sym, const MCExpr *Expr,
// recursive) must be avoided. Do a walk over Expr to see if Sym will occur in
// it. The Expr is an MCExpr given through a callee's equivalent MCSymbol so if
// no recursion is found Sym can be safely assigned to a (sub-)expr which
-// contains the symbol Expr is associated with.
+// contains the symbol Expr is associated with. Returns true if Sym exists
+// in Expr or its sub-expressions, false otherwise.
static bool foundRecursiveSymbolDef(MCSymbol *Sym, const MCExpr *Expr) {
SmallVector<const MCExpr *, 8> WorkList;
SmallPtrSet<const MCExpr *, 8> Visited;
@@ -172,12 +174,8 @@ void MCResourceInfo::assignResourceInfoExpr(
if (!Seen.insert(Callee).second)
continue;
MCSymbol *CalleeValSym = getSymbol(Callee->getName(), RIK, OutContext);
- if (CalleeValSym->isVariable()) {
- if (!foundRecursiveSymbolDef(
- Sym, CalleeValSym->getVariableValue(/*isUsed=*/false))) {
- ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
- }
- } else {
+ bool CalleeIsVar = CalleeValSym->isVariable();
+ if (!CalleeIsVar || (CalleeIsVar && !foundRecursiveSymbolDef(Sym, CalleeValSym->getVariableValue(/*IsUsed=*/false)))) {
ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
}
}
@@ -235,16 +233,11 @@ void MCResourceInfo::gatherResourceInfo(
if (!Seen.insert(Callee).second)
continue;
if (!Callee->isDeclaration()) {
- MCSymbol *calleeValSym =
+ MCSymbol *CalleeValSym =
getSymbol(Callee->getName(), RIK_PrivateSegSize, OutContext);
- if (calleeValSym->isVariable()) {
- if (!foundRecursiveSymbolDef(
- Sym, calleeValSym->getVariableValue(/*isUsed=*/false))) {
- ArgExprs.push_back(
- MCSymbolRefExpr::create(calleeValSym, OutContext));
- }
- } else {
- ArgExprs.push_back(MCSymbolRefExpr::create(calleeValSym, OutContext));
+ bool CalleeIsVar = CalleeValSym->isVariable();
+ if (!CalleeIsVar || (CalleeIsVar && !foundRecursiveSymbolDef(Sym, CalleeValSym->getVariableValue(/*IsUsed=*/false)))) {
+ ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
}
}
}
>From 6d1ff4542a7851f261cd649506ea6e5166cfe999 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Fri, 11 Oct 2024 12:59:16 +0100
Subject: [PATCH 6/6] Formatting
---
llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index 1692e7bbf73409..62caec4d6dd716 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -175,7 +175,10 @@ void MCResourceInfo::assignResourceInfoExpr(
continue;
MCSymbol *CalleeValSym = getSymbol(Callee->getName(), RIK, OutContext);
bool CalleeIsVar = CalleeValSym->isVariable();
- if (!CalleeIsVar || (CalleeIsVar && !foundRecursiveSymbolDef(Sym, CalleeValSym->getVariableValue(/*IsUsed=*/false)))) {
+ if (!CalleeIsVar ||
+ (CalleeIsVar &&
+ !foundRecursiveSymbolDef(
+ Sym, CalleeValSym->getVariableValue(/*IsUsed=*/false)))) {
ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
}
}
@@ -236,7 +239,10 @@ void MCResourceInfo::gatherResourceInfo(
MCSymbol *CalleeValSym =
getSymbol(Callee->getName(), RIK_PrivateSegSize, OutContext);
bool CalleeIsVar = CalleeValSym->isVariable();
- if (!CalleeIsVar || (CalleeIsVar && !foundRecursiveSymbolDef(Sym, CalleeValSym->getVariableValue(/*IsUsed=*/false)))) {
+ if (!CalleeIsVar ||
+ (CalleeIsVar &&
+ !foundRecursiveSymbolDef(
+ Sym, CalleeValSym->getVariableValue(/*IsUsed=*/false)))) {
ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
}
}
More information about the llvm-commits
mailing list