[llvm] [AMDGPU] Change scope of resource usage info symbols (PR #114810)
Janek van Oirschot via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 4 07:16:56 PST 2024
https://github.com/JanekvO created https://github.com/llvm/llvm-project/pull/114810
Change scope of resource usage info MC symbols such that they don't end up in the object file (i.e., as `*ABS*`).
I have tried putting them into their own sections and have the sections omitted but since they're symbolic representations they don't actually end up in any of the sections (let me know if I'm missing something obvious with the section approach, however).
>From 4ffe60579bb67ba3084f99957d03df2690e46e00 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Fri, 1 Nov 2024 18:06:35 +0000
Subject: [PATCH] [AMDGPU] Change scope of resource usage info symbols such
that they don't end up in the object file
---
.../Target/AMDGPU/AMDGPUMCResourceInfo.cpp | 17 +-
.../CodeGen/AMDGPU/agpr-register-count.ll | 34 +-
.../amdpal-metadata-agpr-register-count.ll | 6 +-
...-amdgpu-flat-work-group-size-vgpr-limit.ll | 56 +-
.../AMDGPU/call-alias-register-usage-agpr.ll | 18 +-
.../AMDGPU/call-alias-register-usage0.ll | 12 +-
.../AMDGPU/call-alias-register-usage1.ll | 16 +-
.../AMDGPU/call-alias-register-usage2.ll | 16 +-
.../AMDGPU/call-alias-register-usage3.ll | 16 +-
.../AMDGPU/call-graph-register-usage.ll | 30 +-
.../CodeGen/AMDGPU/function-resource-usage.ll | 550 +++++++++---------
llvm/test/CodeGen/AMDGPU/recursion.ll | 32 +-
.../AMDGPU/resource-optimization-remarks.ll | 16 +-
.../AMDGPU/resource-usage-dead-function.ll | 4 +-
14 files changed, 422 insertions(+), 401 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index da0397fa20bd1b..6d36b516b557ee 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -16,6 +16,7 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
@@ -24,7 +25,9 @@ using namespace llvm;
MCSymbol *MCResourceInfo::getSymbol(StringRef FuncName, ResourceInfoKind RIK,
MCContext &OutContext) {
auto GOCS = [FuncName, &OutContext](StringRef Suffix) {
- return OutContext.getOrCreateSymbol(FuncName + Twine(Suffix));
+ return OutContext.getOrCreateSymbol(
+ Twine(OutContext.getAsmInfo()->getPrivateGlobalPrefix()) + FuncName +
+ Twine(Suffix));
};
switch (RIK) {
case RIK_NumVGPR:
@@ -80,15 +83,21 @@ void MCResourceInfo::finalize(MCContext &OutContext) {
}
MCSymbol *MCResourceInfo::getMaxVGPRSymbol(MCContext &OutContext) {
- return OutContext.getOrCreateSymbol("amdgpu.max_num_vgpr");
+ StringRef PrivatePrefix = OutContext.getAsmInfo()->getPrivateGlobalPrefix();
+ return OutContext.getOrCreateSymbol(Twine(PrivatePrefix) +
+ "amdgpu.max_num_vgpr");
}
MCSymbol *MCResourceInfo::getMaxAGPRSymbol(MCContext &OutContext) {
- return OutContext.getOrCreateSymbol("amdgpu.max_num_agpr");
+ StringRef PrivatePrefix = OutContext.getAsmInfo()->getPrivateGlobalPrefix();
+ return OutContext.getOrCreateSymbol(Twine(PrivatePrefix) +
+ "amdgpu.max_num_agpr");
}
MCSymbol *MCResourceInfo::getMaxSGPRSymbol(MCContext &OutContext) {
- return OutContext.getOrCreateSymbol("amdgpu.max_num_sgpr");
+ StringRef PrivatePrefix = OutContext.getAsmInfo()->getPrivateGlobalPrefix();
+ return OutContext.getOrCreateSymbol(Twine(PrivatePrefix) +
+ "amdgpu.max_num_sgpr");
}
void MCResourceInfo::assignResourceInfoExpr(
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
index 0e16ea10c019ac..b1dd4fecab2cb9 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
@@ -154,28 +154,28 @@ bb:
declare void @undef_func()
; GCN-LABEL: {{^}}kernel_call_undef_func:
-; GCN: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0)
-; GFX90A: .amdhsa_accum_offset ((((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
-; GCN: .set kernel_call_undef_func.num_vgpr, max(32, amdgpu.max_num_vgpr)
-; GCN: .set kernel_call_undef_func.num_agpr, max(0, amdgpu.max_num_agpr)
-; GCN: NumVgprs: kernel_call_undef_func.num_vgpr
-; GCN: NumAgprs: kernel_call_undef_func.num_agpr
-; GCN: TotalNumVgprs: totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr)
-; GFX908: VGPRBlocks: ((alignto(max(max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0), 1), 4))/4)-1
-; GFX90A: VGPRBlocks: ((alignto(max(max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0), 1), 8))/8)-1
-; GCN: NumVGPRsForWavesPerEU: max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0)
-; GFX90A: AccumOffset: ((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)+1)*4
-; GFX908: Occupancy: occupancy(10, 4, 256, 8, 10, max(kernel_call_undef_func.numbered_sgpr+(extrasgprs(kernel_call_undef_func.uses_vcc, kernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0))
-; GFX90A: Occupancy: occupancy(8, 8, 512, 8, 8, max(kernel_call_undef_func.numbered_sgpr+(extrasgprs(kernel_call_undef_func.uses_vcc, kernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0))
-; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: ((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63
+; GCN: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0)
+; GFX90A: .amdhsa_accum_offset ((((((alignto(max(1, .Lkernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
+; GCN: .set .Lkernel_call_undef_func.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lkernel_call_undef_func.num_agpr, max(0, .Lamdgpu.max_num_agpr)
+; GCN: NumVgprs: .Lkernel_call_undef_func.num_vgpr
+; GCN: NumAgprs: .Lkernel_call_undef_func.num_agpr
+; GCN: TotalNumVgprs: totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr)
+; GFX908: VGPRBlocks: ((alignto(max(max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0), 1), 4))/4)-1
+; GFX90A: VGPRBlocks: ((alignto(max(max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0), 1), 8))/8)-1
+; GCN: NumVGPRsForWavesPerEU: max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0)
+; GFX90A: AccumOffset: ((((alignto(max(1, .Lkernel_call_undef_func.num_vgpr), 4))/4)-1)+1)*4
+; GFX908: Occupancy: occupancy(10, 4, 256, 8, 10, max(.Lkernel_call_undef_func.numbered_sgpr+(extrasgprs(.Lkernel_call_undef_func.uses_vcc, .Lkernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0))
+; GFX90A: Occupancy: occupancy(8, 8, 512, 8, 8, max(.Lkernel_call_undef_func.numbered_sgpr+(extrasgprs(.Lkernel_call_undef_func.uses_vcc, .Lkernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0))
+; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: ((((alignto(max(1, .Lkernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63
define amdgpu_kernel void @kernel_call_undef_func() #0 {
bb:
call void @undef_func()
ret void
}
-; GCN: .set amdgpu.max_num_vgpr, 32
-; GCN-NEXT: .set amdgpu.max_num_agpr, 32
-; GCN-NEXT: .set amdgpu.max_num_sgpr, 34
+; GCN: .set .Lamdgpu.max_num_vgpr, 32
+; GCN-NEXT: .set .Lamdgpu.max_num_agpr, 32
+; GCN-NEXT: .set .Lamdgpu.max_num_sgpr, 34
attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
index 8f4cb364751d88..15284ad45a9261 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
@@ -60,9 +60,9 @@ bb:
declare void @undef_func()
; CHECK: .type kernel_call_undef_func
-; CHECK: .set kernel_call_undef_func.num_agpr, max(0, amdgpu.max_num_agpr)
-; CHECK: NumAgprs: kernel_call_undef_func.num_agpr
-; CHECK: .set amdgpu.max_num_agpr, 32
+; CHECK: .set .Lkernel_call_undef_func.num_agpr, max(0, .Lamdgpu.max_num_agpr)
+; CHECK: NumAgprs: .Lkernel_call_undef_func.num_agpr
+; CHECK: .set .Lamdgpu.max_num_agpr, 32
define amdgpu_kernel void @kernel_call_undef_func() #0 {
bb:
call void @undef_func()
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll
index d45e116beb4e3e..374fd32ec5997f 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll
@@ -547,20 +547,20 @@ define amdgpu_kernel void @f256() #256 {
attributes #256 = { nounwind "amdgpu-flat-work-group-size"="256,256" }
; GCN-LABEL: {{^}}f512:
-; GFX9: .set f512.num_vgpr, max(128, amdgpu.max_num_vgpr)
-; GFX90A: .set f512.num_vgpr, max(128, amdgpu.max_num_vgpr)
-; GFX90A: .set f512.num_agpr, max(128, amdgpu.max_num_agpr)
-; GFX10WGP-WAVE32: .set f512.num_vgpr, max(256, amdgpu.max_num_vgpr)
-; GFX10WGP-WAVE64: .set f512.num_vgpr, max(256, amdgpu.max_num_vgpr)
-; GFX10CU-WAVE32: .set f512.num_vgpr, max(128, amdgpu.max_num_vgpr)
-; GFX10CU-WAVE64: .set f512.num_vgpr, max(128, amdgpu.max_num_vgpr)
-; GFX11WGP-WAVE32: .set f512.num_vgpr, max(256, amdgpu.max_num_vgpr)
-; GFX11WGP-WAVE64: .set f512.num_vgpr, max(256, amdgpu.max_num_vgpr)
-; GFX11CU-WAVE32: .set f512.num_vgpr, max(192, amdgpu.max_num_vgpr)
-; GFX11CU-WAVE64: .set f512.num_vgpr, max(192, amdgpu.max_num_vgpr)
-; GCN: NumVgprs: f512.num_vgpr
-; GFX90A: NumAgprs: f512.num_agpr
-; GFX90A: TotalNumVgprs: totalnumvgprs(f512.num_agpr, f512.num_vgpr)
+; GFX9: .set .Lf512.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
+; GFX90A: .set .Lf512.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
+; GFX90A: .set .Lf512.num_agpr, max(128, .Lamdgpu.max_num_agpr)
+; GFX10WGP-WAVE32: .set .Lf512.num_vgpr, max(256, .Lamdgpu.max_num_vgpr)
+; GFX10WGP-WAVE64: .set .Lf512.num_vgpr, max(256, .Lamdgpu.max_num_vgpr)
+; GFX10CU-WAVE32: .set .Lf512.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
+; GFX10CU-WAVE64: .set .Lf512.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
+; GFX11WGP-WAVE32: .set .Lf512.num_vgpr, max(256, .Lamdgpu.max_num_vgpr)
+; GFX11WGP-WAVE64: .set .Lf512.num_vgpr, max(256, .Lamdgpu.max_num_vgpr)
+; GFX11CU-WAVE32: .set .Lf512.num_vgpr, max(192, .Lamdgpu.max_num_vgpr)
+; GFX11CU-WAVE64: .set .Lf512.num_vgpr, max(192, .Lamdgpu.max_num_vgpr)
+; GCN: NumVgprs: .Lf512.num_vgpr
+; GFX90A: NumAgprs: .Lf512.num_agpr
+; GFX90A: TotalNumVgprs: totalnumvgprs(.Lf512.num_agpr, .Lf512.num_vgpr)
define amdgpu_kernel void @f512() #512 {
call void @foo()
call void @use256vgprs()
@@ -569,20 +569,20 @@ define amdgpu_kernel void @f512() #512 {
attributes #512 = { nounwind "amdgpu-flat-work-group-size"="512,512" }
; GCN-LABEL: {{^}}f1024:
-; GFX9: .set f1024.num_vgpr, max(64, amdgpu.max_num_vgpr)
-; GFX90A: .set f1024.num_vgpr, max(64, amdgpu.max_num_vgpr)
-; GFX90A: .set f1024.num_agpr, max(64, amdgpu.max_num_agpr)
-; GFX10WGP-WAVE32: .set f1024.num_vgpr, max(128, amdgpu.max_num_vgpr)
-; GFX10WGP-WAVE64: .set f1024.num_vgpr, max(128, amdgpu.max_num_vgpr)
-; GFX10CU-WAVE32: .set f1024.num_vgpr, max(64, amdgpu.max_num_vgpr)
-; GFX10CU-WAVE64: .set f1024.num_vgpr, max(64, amdgpu.max_num_vgpr)
-; GFX11WGP-WAVE32: .set f1024.num_vgpr, max(192, amdgpu.max_num_vgpr)
-; GFX11WGP-WAVE64: .set f1024.num_vgpr, max(192, amdgpu.max_num_vgpr)
-; GFX11CU-WAVE32: .set f1024.num_vgpr, max(96, amdgpu.max_num_vgpr)
-; GFX11CU-WAVE64: .set f1024.num_vgpr, max(96, amdgpu.max_num_vgpr)
-; GCN: NumVgprs: f1024.num_vgpr
-; GFX90A: NumAgprs: f1024.num_agpr
-; GFX90A: TotalNumVgprs: totalnumvgprs(f1024.num_agpr, f1024.num_vgpr)
+; GFX9: .set .Lf1024.num_vgpr, max(64, .Lamdgpu.max_num_vgpr)
+; GFX90A: .set .Lf1024.num_vgpr, max(64, .Lamdgpu.max_num_vgpr)
+; GFX90A: .set .Lf1024.num_agpr, max(64, .Lamdgpu.max_num_agpr)
+; GFX10WGP-WAVE32: .set .Lf1024.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
+; GFX10WGP-WAVE64: .set .Lf1024.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
+; GFX10CU-WAVE32: .set .Lf1024.num_vgpr, max(64, .Lamdgpu.max_num_vgpr)
+; GFX10CU-WAVE64: .set .Lf1024.num_vgpr, max(64, .Lamdgpu.max_num_vgpr)
+; GFX11WGP-WAVE32: .set .Lf1024.num_vgpr, max(192, .Lamdgpu.max_num_vgpr)
+; GFX11WGP-WAVE64: .set .Lf1024.num_vgpr, max(192, .Lamdgpu.max_num_vgpr)
+; GFX11CU-WAVE32: .set .Lf1024.num_vgpr, max(96, .Lamdgpu.max_num_vgpr)
+; GFX11CU-WAVE64: .set .Lf1024.num_vgpr, max(96, .Lamdgpu.max_num_vgpr)
+; GCN: NumVgprs: .Lf1024.num_vgpr
+; GFX90A: NumAgprs: .Lf1024.num_agpr
+; GFX90A: TotalNumVgprs: totalnumvgprs(.Lf1024.num_agpr, .Lf1024.num_vgpr)
define amdgpu_kernel void @f1024() #1024 {
call void @foo()
call void @use256vgprs()
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
index e8898d6a7001cc..8bc8a7182ff727 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
@@ -8,13 +8,13 @@
@alias = hidden alias void (), ptr @aliasee_default
; ALL-LABEL: {{^}}kernel:
-; ALL: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel.num_agpr, kernel.num_vgpr), 1, 0)
-; ALL-NEXT: .amdhsa_next_free_sgpr (max(kernel.numbered_sgpr+(extrasgprs(kernel.uses_vcc, kernel.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel.uses_vcc, kernel.uses_flat_scratch, 1))
-; GFX90A-NEXT: .amdhsa_accum_offset ((((((alignto(max(1, kernel.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
+; ALL: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel.num_agpr, .Lkernel.num_vgpr), 1, 0)
+; ALL-NEXT: .amdhsa_next_free_sgpr (max(.Lkernel.numbered_sgpr+(extrasgprs(.Lkernel.uses_vcc, .Lkernel.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(.Lkernel.uses_vcc, .Lkernel.uses_flat_scratch, 1))
+; GFX90A-NEXT: .amdhsa_accum_offset ((((((alignto(max(1, .Lkernel.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
-; ALL: .set kernel.num_vgpr, max(32, aliasee_default.num_vgpr)
-; ALL-NEXT: .set kernel.num_agpr, max(0, aliasee_default.num_agpr)
-; ALL-NEXT: .set kernel.numbered_sgpr, max(33, aliasee_default.numbered_sgpr)
+; ALL: .set .Lkernel.num_vgpr, max(32, .Laliasee_default.num_vgpr)
+; ALL-NEXT: .set .Lkernel.num_agpr, max(0, .Laliasee_default.num_agpr)
+; ALL-NEXT: .set .Lkernel.numbered_sgpr, max(33, .Laliasee_default.numbered_sgpr)
define amdgpu_kernel void @kernel() #0 {
bb:
call void @alias() #2
@@ -26,9 +26,9 @@ bb:
call void asm sideeffect "; clobber a26 ", "~{a26}"()
ret void
}
-; ALL: .set aliasee_default.num_vgpr, 0
-; ALL-NEXT: .set aliasee_default.num_agpr, 27
-; ALL-NEXT: .set aliasee_default.numbered_sgpr, 32
+; ALL: .set .Laliasee_default.num_vgpr, 0
+; ALL-NEXT: .set .Laliasee_default.num_agpr, 27
+; ALL-NEXT: .set .Laliasee_default.numbered_sgpr, 32
attributes #0 = { noinline norecurse nounwind optnone }
attributes #1 = { noinline norecurse nounwind readnone willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
index a01268625cedbd..deb0973d37e3dc 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
@@ -7,18 +7,18 @@
@alias0 = hidden alias void (), ptr @aliasee_default_vgpr64_sgpr102
; CHECK-LABEL: {{^}}kernel0:
-; CHECK: .set kernel0.num_vgpr, max(32, aliasee_default_vgpr64_sgpr102.num_vgpr)
-; CHECK-NEXT: .set kernel0.num_agpr, max(0, aliasee_default_vgpr64_sgpr102.num_agpr)
-; CHECK-NEXT: .set kernel0.numbered_sgpr, max(33, aliasee_default_vgpr64_sgpr102.numbered_sgpr)
+; CHECK: .set .Lkernel0.num_vgpr, max(32, .Laliasee_default_vgpr64_sgpr102.num_vgpr)
+; CHECK-NEXT: .set .Lkernel0.num_agpr, max(0, .Laliasee_default_vgpr64_sgpr102.num_agpr)
+; CHECK-NEXT: .set .Lkernel0.numbered_sgpr, max(33, .Laliasee_default_vgpr64_sgpr102.numbered_sgpr)
define amdgpu_kernel void @kernel0() #0 {
bb:
call void @alias0() #2
ret void
}
-; CHECK: .set aliasee_default_vgpr64_sgpr102.num_vgpr, 53
-; CHECK-NEXT: .set aliasee_default_vgpr64_sgpr102.num_agpr, 0
-; CHECK-NEXT: .set aliasee_default_vgpr64_sgpr102.numbered_sgpr, 32
+; CHECK: .set .Laliasee_default_vgpr64_sgpr102.num_vgpr, 53
+; CHECK-NEXT: .set .Laliasee_default_vgpr64_sgpr102.num_agpr, 0
+; CHECK-NEXT: .set .Laliasee_default_vgpr64_sgpr102.numbered_sgpr, 32
define internal void @aliasee_default_vgpr64_sgpr102() #1 {
bb:
call void asm sideeffect "; clobber v52 ", "~{v52}"()
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
index 86defe3ba7ec08..eeaf04aacead08 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
@@ -9,12 +9,12 @@
; The parent kernel has a higher VGPR usage than the possible callees.
; CHECK-LABEL: {{^}}kernel1:
-; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel1.num_agpr, kernel1.num_vgpr), 1, 0)
-; CHECK-NEXT: .amdhsa_next_free_sgpr (max(kernel1.numbered_sgpr+(extrasgprs(kernel1.uses_vcc, kernel1.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel1.uses_vcc, kernel1.uses_flat_scratch, 1))
+; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel1.num_agpr, .Lkernel1.num_vgpr), 1, 0)
+; CHECK-NEXT: .amdhsa_next_free_sgpr (max(.Lkernel1.numbered_sgpr+(extrasgprs(.Lkernel1.uses_vcc, .Lkernel1.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(.Lkernel1.uses_vcc, .Lkernel1.uses_flat_scratch, 1))
-; CHECK: .set kernel1.num_vgpr, max(41, aliasee_vgpr32_sgpr76.num_vgpr)
-; CHECK-NEXT: .set kernel1.num_agpr, max(0, aliasee_vgpr32_sgpr76.num_agpr)
-; CHECK-NEXT: .set kernel1.numbered_sgpr, max(33, aliasee_vgpr32_sgpr76.numbered_sgpr)
+; CHECK: .set .Lkernel1.num_vgpr, max(41, .Laliasee_vgpr32_sgpr76.num_vgpr)
+; CHECK-NEXT: .set .Lkernel1.num_agpr, max(0, .Laliasee_vgpr32_sgpr76.num_agpr)
+; CHECK-NEXT: .set .Lkernel1.numbered_sgpr, max(33, .Laliasee_vgpr32_sgpr76.numbered_sgpr)
define amdgpu_kernel void @kernel1() #0 {
bb:
call void asm sideeffect "; clobber v40 ", "~{v40}"()
@@ -22,9 +22,9 @@ bb:
ret void
}
-; CHECK: .set aliasee_vgpr32_sgpr76.num_vgpr, 27
-; CHECK-NEXT: .set aliasee_vgpr32_sgpr76.num_agpr, 0
-; CHECK-NEXT: .set aliasee_vgpr32_sgpr76.numbered_sgpr, 32
+; CHECK: .set .Laliasee_vgpr32_sgpr76.num_vgpr, 27
+; CHECK-NEXT: .set .Laliasee_vgpr32_sgpr76.num_agpr, 0
+; CHECK-NEXT: .set .Laliasee_vgpr32_sgpr76.numbered_sgpr, 32
define internal void @aliasee_vgpr32_sgpr76() #1 {
bb:
call void asm sideeffect "; clobber v26 ", "~{v26}"()
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
index 6b1fbd9b6e16a2..f8fb4a79768b73 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
@@ -7,21 +7,21 @@
@alias2 = hidden alias void (), ptr @aliasee_vgpr64_sgpr102
; CHECK-LABEL: {{^}}kernel2:
-; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel2.num_agpr, kernel2.num_vgpr), 1, 0)
-; CHECK-NEXT: .amdhsa_next_free_sgpr (max(kernel2.numbered_sgpr+(extrasgprs(kernel2.uses_vcc, kernel2.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel2.uses_vcc, kernel2.uses_flat_scratch, 1))
+; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel2.num_agpr, .Lkernel2.num_vgpr), 1, 0)
+; CHECK-NEXT: .amdhsa_next_free_sgpr (max(.Lkernel2.numbered_sgpr+(extrasgprs(.Lkernel2.uses_vcc, .Lkernel2.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(.Lkernel2.uses_vcc, .Lkernel2.uses_flat_scratch, 1))
-; CHECK: .set kernel2.num_vgpr, max(32, aliasee_vgpr64_sgpr102.num_vgpr)
-; CHECK-NEXT: .set kernel2.num_agpr, max(0, aliasee_vgpr64_sgpr102.num_agpr)
-; CHECK-NEXT: .set kernel2.numbered_sgpr, max(33, aliasee_vgpr64_sgpr102.numbered_sgpr)
+; CHECK: .set .Lkernel2.num_vgpr, max(32, .Laliasee_vgpr64_sgpr102.num_vgpr)
+; CHECK-NEXT: .set .Lkernel2.num_agpr, max(0, .Laliasee_vgpr64_sgpr102.num_agpr)
+; CHECK-NEXT: .set .Lkernel2.numbered_sgpr, max(33, .Laliasee_vgpr64_sgpr102.numbered_sgpr)
define amdgpu_kernel void @kernel2() #0 {
bb:
call void @alias2() #2
ret void
}
-; CHECK: .set aliasee_vgpr64_sgpr102.num_vgpr, 53
-; CHECK-NEXT: .set aliasee_vgpr64_sgpr102.num_agpr, 0
-; CHECK-NEXT: .set aliasee_vgpr64_sgpr102.numbered_sgpr, 32
+; CHECK: .set .Laliasee_vgpr64_sgpr102.num_vgpr, 53
+; CHECK-NEXT: .set .Laliasee_vgpr64_sgpr102.num_agpr, 0
+; CHECK-NEXT: .set .Laliasee_vgpr64_sgpr102.numbered_sgpr, 32
define internal void @aliasee_vgpr64_sgpr102() #1 {
bb:
call void asm sideeffect "; clobber v52 ", "~{v52}"()
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
index c81181cd826677..f6ec9ba34d7edb 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
@@ -7,21 +7,21 @@
@alias3 = hidden alias void (), ptr @aliasee_vgpr256_sgpr102
; CHECK-LABEL: {{^}}kernel3:
-; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel3.num_agpr, kernel3.num_vgpr), 1, 0)
-; CHECK-NEXT: .amdhsa_next_free_sgpr (max(kernel3.numbered_sgpr+(extrasgprs(kernel3.uses_vcc, kernel3.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel3.uses_vcc, kernel3.uses_flat_scratch, 1))
+; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel3.num_agpr, .Lkernel3.num_vgpr), 1, 0)
+; CHECK-NEXT: .amdhsa_next_free_sgpr (max(.Lkernel3.numbered_sgpr+(extrasgprs(.Lkernel3.uses_vcc, .Lkernel3.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(.Lkernel3.uses_vcc, .Lkernel3.uses_flat_scratch, 1))
-; CHECK: .set kernel3.num_vgpr, max(32, aliasee_vgpr256_sgpr102.num_vgpr)
-; CHECK-NEXT: .set kernel3.num_agpr, max(0, aliasee_vgpr256_sgpr102.num_agpr)
-; CHECK-NEXT: .set kernel3.numbered_sgpr, max(33, aliasee_vgpr256_sgpr102.numbered_sgpr)
+; CHECK: .set .Lkernel3.num_vgpr, max(32, .Laliasee_vgpr256_sgpr102.num_vgpr)
+; CHECK-NEXT: .set .Lkernel3.num_agpr, max(0, .Laliasee_vgpr256_sgpr102.num_agpr)
+; CHECK-NEXT: .set .Lkernel3.numbered_sgpr, max(33, .Laliasee_vgpr256_sgpr102.numbered_sgpr)
define amdgpu_kernel void @kernel3() #0 {
bb:
call void @alias3() #2
ret void
}
-; CHECK: .set aliasee_vgpr256_sgpr102.num_vgpr, 253
-; CHECK-NEXT: .set aliasee_vgpr256_sgpr102.num_agpr, 0
-; CHECK-NEXT: .set aliasee_vgpr256_sgpr102.numbered_sgpr, 33
+; CHECK: .set .Laliasee_vgpr256_sgpr102.num_vgpr, 253
+; CHECK-NEXT: .set .Laliasee_vgpr256_sgpr102.num_agpr, 0
+; CHECK-NEXT: .set .Laliasee_vgpr256_sgpr102.numbered_sgpr, 33
define internal void @aliasee_vgpr256_sgpr102() #1 {
bb:
call void asm sideeffect "; clobber v252 ", "~{v252}"()
diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
index dbd00f09943c01..61450ab655b86a 100644
--- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
@@ -234,11 +234,11 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
; Make sure there's no assert when a sgpr96 is used.
; GCN-LABEL: {{^}}count_use_sgpr96_external_call
; GCN: ; sgpr96 s[{{[0-9]+}}:{{[0-9]+}}]
-; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(0, amdgpu.max_num_vgpr)
-; GCN: .set count_use_sgpr96_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
-; CI: TotalNumSgprs: count_use_sgpr96_external_call.numbered_sgpr+4
+; GCN: .set .Lcount_use_sgpr96_external_call.num_vgpr, max(0, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lcount_use_sgpr96_external_call.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
+; CI: TotalNumSgprs: .Lcount_use_sgpr96_external_call.numbered_sgpr+4
; VI-BUG: TotalNumSgprs: 96
-; GCN: NumVgprs: count_use_sgpr96_external_call.num_vgpr
+; GCN: NumVgprs: .Lcount_use_sgpr96_external_call.num_vgpr
define amdgpu_kernel void @count_use_sgpr96_external_call() {
entry:
tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1
@@ -249,11 +249,11 @@ entry:
; Make sure there's no assert when a sgpr160 is used.
; GCN-LABEL: {{^}}count_use_sgpr160_external_call
; GCN: ; sgpr160 s[{{[0-9]+}}:{{[0-9]+}}]
-; GCN: .set count_use_sgpr160_external_call.num_vgpr, max(0, amdgpu.max_num_vgpr)
-; GCN: .set count_use_sgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
-; CI: TotalNumSgprs: count_use_sgpr160_external_call.numbered_sgpr+4
+; GCN: .set .Lcount_use_sgpr160_external_call.num_vgpr, max(0, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lcount_use_sgpr160_external_call.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
+; CI: TotalNumSgprs: .Lcount_use_sgpr160_external_call.numbered_sgpr+4
; VI-BUG: TotalNumSgprs: 96
-; GCN: NumVgprs: count_use_sgpr160_external_call.num_vgpr
+; GCN: NumVgprs: .Lcount_use_sgpr160_external_call.num_vgpr
define amdgpu_kernel void @count_use_sgpr160_external_call() {
entry:
tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
@@ -264,11 +264,11 @@ entry:
; Make sure there's no assert when a vgpr160 is used.
; GCN-LABEL: {{^}}count_use_vgpr160_external_call
; GCN: ; vgpr160 v[{{[0-9]+}}:{{[0-9]+}}]
-; GCN: .set count_use_vgpr160_external_call.num_vgpr, max(5, amdgpu.max_num_vgpr)
-; GCN: .set count_use_vgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
-; CI: TotalNumSgprs: count_use_vgpr160_external_call.numbered_sgpr+4
+; GCN: .set .Lcount_use_vgpr160_external_call.num_vgpr, max(5, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lcount_use_vgpr160_external_call.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
+; CI: TotalNumSgprs: .Lcount_use_vgpr160_external_call.numbered_sgpr+4
; VI-BUG: TotalNumSgprs: 96
-; GCN: NumVgprs: count_use_vgpr160_external_call.num_vgpr
+; GCN: NumVgprs: .Lcount_use_vgpr160_external_call.num_vgpr
define amdgpu_kernel void @count_use_vgpr160_external_call() {
entry:
tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
@@ -276,9 +276,9 @@ entry:
ret void
}
-; GCN: .set amdgpu.max_num_vgpr, 50
-; GCN: .set amdgpu.max_num_agpr, 0
-; GCN: .set amdgpu.max_num_sgpr, 80
+; GCN: .set .Lamdgpu.max_num_vgpr, 50
+; GCN: .set .Lamdgpu.max_num_agpr, 0
+; GCN: .set .Lamdgpu.max_num_sgpr, 80
; GCN-LABEL: amdhsa.kernels:
; GCN: .name: count_use_sgpr96_external_call
diff --git a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
index d3a6b4e01ebfb8..1d7233b47c0d52 100644
--- a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
@@ -1,17 +1,29 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -filetype=obj < %s > %t
+; RUN: llvm-objdump --syms %t | FileCheck -check-prefix=OBJ %s
+
+; OBJ-NOT: num_vgpr
+; OBJ-NOT: num_agpr
+; OBJ-NOT: numbered_sgpr
+; OBJ-NOT: private_seg_size
+; OBJ-NOT: uses_vcc
+; OBJ-NOT: uses_flat_scratch
+; OBJ-NOT: has_dyn_sized_stack
+; OBJ-NOT: has_recursion
+; OBJ-NOT: has_indirect_call
; Functions that don't make calls should have constants as its resource usage as no resource information has to be propagated.
; GCN-LABEL: {{^}}use_vcc:
-; GCN: .set use_vcc.num_vgpr, 0
-; GCN: .set use_vcc.num_agpr, 0
-; GCN: .set use_vcc.numbered_sgpr, 32
-; GCN: .set use_vcc.private_seg_size, 0
-; GCN: .set use_vcc.uses_vcc, 1
-; GCN: .set use_vcc.uses_flat_scratch, 0
-; GCN: .set use_vcc.has_dyn_sized_stack, 0
-; GCN: .set use_vcc.has_recursion, 0
-; GCN: .set use_vcc.has_indirect_call, 0
+; GCN: .set .Luse_vcc.num_vgpr, 0
+; GCN: .set .Luse_vcc.num_agpr, 0
+; GCN: .set .Luse_vcc.numbered_sgpr, 32
+; GCN: .set .Luse_vcc.private_seg_size, 0
+; GCN: .set .Luse_vcc.uses_vcc, 1
+; GCN: .set .Luse_vcc.uses_flat_scratch, 0
+; GCN: .set .Luse_vcc.has_dyn_sized_stack, 0
+; GCN: .set .Luse_vcc.has_recursion, 0
+; GCN: .set .Luse_vcc.has_indirect_call, 0
; GCN: TotalNumSgprs: 36
; GCN: NumVgprs: 0
; GCN: ScratchSize: 0
@@ -21,15 +33,15 @@ define void @use_vcc() #1 {
}
; GCN-LABEL: {{^}}indirect_use_vcc:
-; GCN: .set indirect_use_vcc.num_vgpr, max(41, use_vcc.num_vgpr)
-; GCN: .set indirect_use_vcc.num_agpr, max(0, use_vcc.num_agpr)
-; GCN: .set indirect_use_vcc.numbered_sgpr, max(34, use_vcc.numbered_sgpr)
-; GCN: .set indirect_use_vcc.private_seg_size, 16+(max(use_vcc.private_seg_size))
-; GCN: .set indirect_use_vcc.uses_vcc, or(1, use_vcc.uses_vcc)
-; GCN: .set indirect_use_vcc.uses_flat_scratch, or(0, use_vcc.uses_flat_scratch)
-; GCN: .set indirect_use_vcc.has_dyn_sized_stack, or(0, use_vcc.has_dyn_sized_stack)
-; GCN: .set indirect_use_vcc.has_recursion, or(0, use_vcc.has_recursion)
-; GCN: .set indirect_use_vcc.has_indirect_call, or(0, use_vcc.has_indirect_call)
+; GCN: .set .Lindirect_use_vcc.num_vgpr, max(41, .Luse_vcc.num_vgpr)
+; GCN: .set .Lindirect_use_vcc.num_agpr, max(0, .Luse_vcc.num_agpr)
+; GCN: .set .Lindirect_use_vcc.numbered_sgpr, max(34, .Luse_vcc.numbered_sgpr)
+; GCN: .set .Lindirect_use_vcc.private_seg_size, 16+(max(.Luse_vcc.private_seg_size))
+; GCN: .set .Lindirect_use_vcc.uses_vcc, or(1, .Luse_vcc.uses_vcc)
+; GCN: .set .Lindirect_use_vcc.uses_flat_scratch, or(0, .Luse_vcc.uses_flat_scratch)
+; GCN: .set .Lindirect_use_vcc.has_dyn_sized_stack, or(0, .Luse_vcc.has_dyn_sized_stack)
+; GCN: .set .Lindirect_use_vcc.has_recursion, or(0, .Luse_vcc.has_recursion)
+; GCN: .set .Lindirect_use_vcc.has_indirect_call, or(0, .Luse_vcc.has_indirect_call)
; GCN: TotalNumSgprs: 38
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
@@ -39,15 +51,15 @@ define void @indirect_use_vcc() #1 {
}
; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel:
-; GCN: .set indirect_2level_use_vcc_kernel.num_vgpr, max(32, indirect_use_vcc.num_vgpr)
-; GCN: .set indirect_2level_use_vcc_kernel.num_agpr, max(0, indirect_use_vcc.num_agpr)
-; GCN: .set indirect_2level_use_vcc_kernel.numbered_sgpr, max(33, indirect_use_vcc.numbered_sgpr)
-; GCN: .set indirect_2level_use_vcc_kernel.private_seg_size, 0+(max(indirect_use_vcc.private_seg_size))
-; GCN: .set indirect_2level_use_vcc_kernel.uses_vcc, or(1, indirect_use_vcc.uses_vcc)
-; GCN: .set indirect_2level_use_vcc_kernel.uses_flat_scratch, or(1, indirect_use_vcc.uses_flat_scratch)
-; GCN: .set indirect_2level_use_vcc_kernel.has_dyn_sized_stack, or(0, indirect_use_vcc.has_dyn_sized_stack)
-; GCN: .set indirect_2level_use_vcc_kernel.has_recursion, or(0, indirect_use_vcc.has_recursion)
-; GCN: .set indirect_2level_use_vcc_kernel.has_indirect_call, or(0, indirect_use_vcc.has_indirect_call)
+; GCN: .set .Lindirect_2level_use_vcc_kernel.num_vgpr, max(32, .Lindirect_use_vcc.num_vgpr)
+; GCN: .set .Lindirect_2level_use_vcc_kernel.num_agpr, max(0, .Lindirect_use_vcc.num_agpr)
+; GCN: .set .Lindirect_2level_use_vcc_kernel.numbered_sgpr, max(33, .Lindirect_use_vcc.numbered_sgpr)
+; GCN: .set .Lindirect_2level_use_vcc_kernel.private_seg_size, 0+(max(.Lindirect_use_vcc.private_seg_size))
+; GCN: .set .Lindirect_2level_use_vcc_kernel.uses_vcc, or(1, .Lindirect_use_vcc.uses_vcc)
+; GCN: .set .Lindirect_2level_use_vcc_kernel.uses_flat_scratch, or(1, .Lindirect_use_vcc.uses_flat_scratch)
+; GCN: .set .Lindirect_2level_use_vcc_kernel.has_dyn_sized_stack, or(0, .Lindirect_use_vcc.has_dyn_sized_stack)
+; GCN: .set .Lindirect_2level_use_vcc_kernel.has_recursion, or(0, .Lindirect_use_vcc.has_recursion)
+; GCN: .set .Lindirect_2level_use_vcc_kernel.has_indirect_call, or(0, .Lindirect_use_vcc.has_indirect_call)
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
@@ -57,15 +69,15 @@ define amdgpu_kernel void @indirect_2level_use_vcc_kernel(ptr addrspace(1) %out)
}
; GCN-LABEL: {{^}}use_flat_scratch:
-; GCN: .set use_flat_scratch.num_vgpr, 0
-; GCN: .set use_flat_scratch.num_agpr, 0
-; GCN: .set use_flat_scratch.numbered_sgpr, 32
-; GCN: .set use_flat_scratch.private_seg_size, 0
-; GCN: .set use_flat_scratch.uses_vcc, 0
-; GCN: .set use_flat_scratch.uses_flat_scratch, 1
-; GCN: .set use_flat_scratch.has_dyn_sized_stack, 0
-; GCN: .set use_flat_scratch.has_recursion, 0
-; GCN: .set use_flat_scratch.has_indirect_call, 0
+; GCN: .set .Luse_flat_scratch.num_vgpr, 0
+; GCN: .set .Luse_flat_scratch.num_agpr, 0
+; GCN: .set .Luse_flat_scratch.numbered_sgpr, 32
+; GCN: .set .Luse_flat_scratch.private_seg_size, 0
+; GCN: .set .Luse_flat_scratch.uses_vcc, 0
+; GCN: .set .Luse_flat_scratch.uses_flat_scratch, 1
+; GCN: .set .Luse_flat_scratch.has_dyn_sized_stack, 0
+; GCN: .set .Luse_flat_scratch.has_recursion, 0
+; GCN: .set .Luse_flat_scratch.has_indirect_call, 0
; GCN: TotalNumSgprs: 38
; GCN: NumVgprs: 0
; GCN: ScratchSize: 0
@@ -75,15 +87,15 @@ define void @use_flat_scratch() #1 {
}
; GCN-LABEL: {{^}}indirect_use_flat_scratch:
-; GCN: .set indirect_use_flat_scratch.num_vgpr, max(41, use_flat_scratch.num_vgpr)
-; GCN: .set indirect_use_flat_scratch.num_agpr, max(0, use_flat_scratch.num_agpr)
-; GCN: .set indirect_use_flat_scratch.numbered_sgpr, max(34, use_flat_scratch.numbered_sgpr)
-; GCN: .set indirect_use_flat_scratch.private_seg_size, 16+(max(use_flat_scratch.private_seg_size))
-; GCN: .set indirect_use_flat_scratch.uses_vcc, or(1, use_flat_scratch.uses_vcc)
-; GCN: .set indirect_use_flat_scratch.uses_flat_scratch, or(0, use_flat_scratch.uses_flat_scratch)
-; GCN: .set indirect_use_flat_scratch.has_dyn_sized_stack, or(0, use_flat_scratch.has_dyn_sized_stack)
-; GCN: .set indirect_use_flat_scratch.has_recursion, or(0, use_flat_scratch.has_recursion)
-; GCN: .set indirect_use_flat_scratch.has_indirect_call, or(0, use_flat_scratch.has_indirect_call)
+; GCN: .set .Lindirect_use_flat_scratch.num_vgpr, max(41, .Luse_flat_scratch.num_vgpr)
+; GCN: .set .Lindirect_use_flat_scratch.num_agpr, max(0, .Luse_flat_scratch.num_agpr)
+; GCN: .set .Lindirect_use_flat_scratch.numbered_sgpr, max(34, .Luse_flat_scratch.numbered_sgpr)
+; GCN: .set .Lindirect_use_flat_scratch.private_seg_size, 16+(max(.Luse_flat_scratch.private_seg_size))
+; GCN: .set .Lindirect_use_flat_scratch.uses_vcc, or(1, .Luse_flat_scratch.uses_vcc)
+; GCN: .set .Lindirect_use_flat_scratch.uses_flat_scratch, or(0, .Luse_flat_scratch.uses_flat_scratch)
+; GCN: .set .Lindirect_use_flat_scratch.has_dyn_sized_stack, or(0, .Luse_flat_scratch.has_dyn_sized_stack)
+; GCN: .set .Lindirect_use_flat_scratch.has_recursion, or(0, .Luse_flat_scratch.has_recursion)
+; GCN: .set .Lindirect_use_flat_scratch.has_indirect_call, or(0, .Luse_flat_scratch.has_indirect_call)
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
@@ -93,15 +105,15 @@ define void @indirect_use_flat_scratch() #1 {
}
; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel:
-; GCN: .set indirect_2level_use_flat_scratch_kernel.num_vgpr, max(32, indirect_use_flat_scratch.num_vgpr)
-; GCN: .set indirect_2level_use_flat_scratch_kernel.num_agpr, max(0, indirect_use_flat_scratch.num_agpr)
-; GCN: .set indirect_2level_use_flat_scratch_kernel.numbered_sgpr, max(33, indirect_use_flat_scratch.numbered_sgpr)
-; GCN: .set indirect_2level_use_flat_scratch_kernel.private_seg_size, 0+(max(indirect_use_flat_scratch.private_seg_size))
-; GCN: .set indirect_2level_use_flat_scratch_kernel.uses_vcc, or(1, indirect_use_flat_scratch.uses_vcc)
-; GCN: .set indirect_2level_use_flat_scratch_kernel.uses_flat_scratch, or(1, indirect_use_flat_scratch.uses_flat_scratch)
-; GCN: .set indirect_2level_use_flat_scratch_kernel.has_dyn_sized_stack, or(0, indirect_use_flat_scratch.has_dyn_sized_stack)
-; GCN: .set indirect_2level_use_flat_scratch_kernel.has_recursion, or(0, indirect_use_flat_scratch.has_recursion)
-; GCN: .set indirect_2level_use_flat_scratch_kernel.has_indirect_call, or(0, indirect_use_flat_scratch.has_indirect_call)
+; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.num_vgpr, max(32, .Lindirect_use_flat_scratch.num_vgpr)
+; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.num_agpr, max(0, .Lindirect_use_flat_scratch.num_agpr)
+; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.numbered_sgpr, max(33, .Lindirect_use_flat_scratch.numbered_sgpr)
+; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.private_seg_size, 0+(max(.Lindirect_use_flat_scratch.private_seg_size))
+; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.uses_vcc, or(1, .Lindirect_use_flat_scratch.uses_vcc)
+; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.uses_flat_scratch, or(1, .Lindirect_use_flat_scratch.uses_flat_scratch)
+; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.has_dyn_sized_stack, or(0, .Lindirect_use_flat_scratch.has_dyn_sized_stack)
+; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.has_recursion, or(0, .Lindirect_use_flat_scratch.has_recursion)
+; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.has_indirect_call, or(0, .Lindirect_use_flat_scratch.has_indirect_call)
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
@@ -111,15 +123,15 @@ define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(ptr addrspace
}
; GCN-LABEL: {{^}}use_10_vgpr:
-; GCN: .set use_10_vgpr.num_vgpr, 10
-; GCN: .set use_10_vgpr.num_agpr, 0
-; GCN: .set use_10_vgpr.numbered_sgpr, 32
-; GCN: .set use_10_vgpr.private_seg_size, 0
-; GCN: .set use_10_vgpr.uses_vcc, 0
-; GCN: .set use_10_vgpr.uses_flat_scratch, 0
-; GCN: .set use_10_vgpr.has_dyn_sized_stack, 0
-; GCN: .set use_10_vgpr.has_recursion, 0
-; GCN: .set use_10_vgpr.has_indirect_call, 0
+; GCN: .set .Luse_10_vgpr.num_vgpr, 10
+; GCN: .set .Luse_10_vgpr.num_agpr, 0
+; GCN: .set .Luse_10_vgpr.numbered_sgpr, 32
+; GCN: .set .Luse_10_vgpr.private_seg_size, 0
+; GCN: .set .Luse_10_vgpr.uses_vcc, 0
+; GCN: .set .Luse_10_vgpr.uses_flat_scratch, 0
+; GCN: .set .Luse_10_vgpr.has_dyn_sized_stack, 0
+; GCN: .set .Luse_10_vgpr.has_recursion, 0
+; GCN: .set .Luse_10_vgpr.has_indirect_call, 0
; GCN: TotalNumSgprs: 36
; GCN: NumVgprs: 10
; GCN: ScratchSize: 0
@@ -130,15 +142,15 @@ define void @use_10_vgpr() #1 {
}
; GCN-LABEL: {{^}}indirect_use_10_vgpr:
-; GCN: .set indirect_use_10_vgpr.num_vgpr, max(41, use_10_vgpr.num_vgpr)
-; GCN: .set indirect_use_10_vgpr.num_agpr, max(0, use_10_vgpr.num_agpr)
-; GCN: .set indirect_use_10_vgpr.numbered_sgpr, max(34, use_10_vgpr.numbered_sgpr)
-; GCN: .set indirect_use_10_vgpr.private_seg_size, 16+(max(use_10_vgpr.private_seg_size))
-; GCN: .set indirect_use_10_vgpr.uses_vcc, or(1, use_10_vgpr.uses_vcc)
-; GCN: .set indirect_use_10_vgpr.uses_flat_scratch, or(0, use_10_vgpr.uses_flat_scratch)
-; GCN: .set indirect_use_10_vgpr.has_dyn_sized_stack, or(0, use_10_vgpr.has_dyn_sized_stack)
-; GCN: .set indirect_use_10_vgpr.has_recursion, or(0, use_10_vgpr.has_recursion)
-; GCN: .set indirect_use_10_vgpr.has_indirect_call, or(0, use_10_vgpr.has_indirect_call)
+; GCN: .set .Lindirect_use_10_vgpr.num_vgpr, max(41, .Luse_10_vgpr.num_vgpr)
+; GCN: .set .Lindirect_use_10_vgpr.num_agpr, max(0, .Luse_10_vgpr.num_agpr)
+; GCN: .set .Lindirect_use_10_vgpr.numbered_sgpr, max(34, .Luse_10_vgpr.numbered_sgpr)
+; GCN: .set .Lindirect_use_10_vgpr.private_seg_size, 16+(max(.Luse_10_vgpr.private_seg_size))
+; GCN: .set .Lindirect_use_10_vgpr.uses_vcc, or(1, .Luse_10_vgpr.uses_vcc)
+; GCN: .set .Lindirect_use_10_vgpr.uses_flat_scratch, or(0, .Luse_10_vgpr.uses_flat_scratch)
+; GCN: .set .Lindirect_use_10_vgpr.has_dyn_sized_stack, or(0, .Luse_10_vgpr.has_dyn_sized_stack)
+; GCN: .set .Lindirect_use_10_vgpr.has_recursion, or(0, .Luse_10_vgpr.has_recursion)
+; GCN: .set .Lindirect_use_10_vgpr.has_indirect_call, or(0, .Luse_10_vgpr.has_indirect_call)
; GCN: TotalNumSgprs: 38
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
@@ -148,15 +160,15 @@ define void @indirect_use_10_vgpr() #0 {
}
; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr:
-; GCN: .set indirect_2_level_use_10_vgpr.num_vgpr, max(32, indirect_use_10_vgpr.num_vgpr)
-; GCN: .set indirect_2_level_use_10_vgpr.num_agpr, max(0, indirect_use_10_vgpr.num_agpr)
-; GCN: .set indirect_2_level_use_10_vgpr.numbered_sgpr, max(33, indirect_use_10_vgpr.numbered_sgpr)
-; GCN: .set indirect_2_level_use_10_vgpr.private_seg_size, 0+(max(indirect_use_10_vgpr.private_seg_size))
-; GCN: .set indirect_2_level_use_10_vgpr.uses_vcc, or(1, indirect_use_10_vgpr.uses_vcc)
-; GCN: .set indirect_2_level_use_10_vgpr.uses_flat_scratch, or(1, indirect_use_10_vgpr.uses_flat_scratch)
-; GCN: .set indirect_2_level_use_10_vgpr.has_dyn_sized_stack, or(0, indirect_use_10_vgpr.has_dyn_sized_stack)
-; GCN: .set indirect_2_level_use_10_vgpr.has_recursion, or(0, indirect_use_10_vgpr.has_recursion)
-; GCN: .set indirect_2_level_use_10_vgpr.has_indirect_call, or(0, indirect_use_10_vgpr.has_indirect_call)
+; GCN: .set .Lindirect_2_level_use_10_vgpr.num_vgpr, max(32, .Lindirect_use_10_vgpr.num_vgpr)
+; GCN: .set .Lindirect_2_level_use_10_vgpr.num_agpr, max(0, .Lindirect_use_10_vgpr.num_agpr)
+; GCN: .set .Lindirect_2_level_use_10_vgpr.numbered_sgpr, max(33, .Lindirect_use_10_vgpr.numbered_sgpr)
+; GCN: .set .Lindirect_2_level_use_10_vgpr.private_seg_size, 0+(max(.Lindirect_use_10_vgpr.private_seg_size))
+; GCN: .set .Lindirect_2_level_use_10_vgpr.uses_vcc, or(1, .Lindirect_use_10_vgpr.uses_vcc)
+; GCN: .set .Lindirect_2_level_use_10_vgpr.uses_flat_scratch, or(1, .Lindirect_use_10_vgpr.uses_flat_scratch)
+; GCN: .set .Lindirect_2_level_use_10_vgpr.has_dyn_sized_stack, or(0, .Lindirect_use_10_vgpr.has_dyn_sized_stack)
+; GCN: .set .Lindirect_2_level_use_10_vgpr.has_recursion, or(0, .Lindirect_use_10_vgpr.has_recursion)
+; GCN: .set .Lindirect_2_level_use_10_vgpr.has_indirect_call, or(0, .Lindirect_use_10_vgpr.has_indirect_call)
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
@@ -166,15 +178,15 @@ define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 {
}
; GCN-LABEL: {{^}}use_50_vgpr:
-; GCN: .set use_50_vgpr.num_vgpr, 50
-; GCN: .set use_50_vgpr.num_agpr, 0
-; GCN: .set use_50_vgpr.numbered_sgpr, 32
-; GCN: .set use_50_vgpr.private_seg_size, 0
-; GCN: .set use_50_vgpr.uses_vcc, 0
-; GCN: .set use_50_vgpr.uses_flat_scratch, 0
-; GCN: .set use_50_vgpr.has_dyn_sized_stack, 0
-; GCN: .set use_50_vgpr.has_recursion, 0
-; GCN: .set use_50_vgpr.has_indirect_call, 0
+; GCN: .set .Luse_50_vgpr.num_vgpr, 50
+; GCN: .set .Luse_50_vgpr.num_agpr, 0
+; GCN: .set .Luse_50_vgpr.numbered_sgpr, 32
+; GCN: .set .Luse_50_vgpr.private_seg_size, 0
+; GCN: .set .Luse_50_vgpr.uses_vcc, 0
+; GCN: .set .Luse_50_vgpr.uses_flat_scratch, 0
+; GCN: .set .Luse_50_vgpr.has_dyn_sized_stack, 0
+; GCN: .set .Luse_50_vgpr.has_recursion, 0
+; GCN: .set .Luse_50_vgpr.has_indirect_call, 0
; GCN: TotalNumSgprs: 36
; GCN: NumVgprs: 50
; GCN: ScratchSize: 0
@@ -184,15 +196,15 @@ define void @use_50_vgpr() #1 {
}
; GCN-LABEL: {{^}}indirect_use_50_vgpr:
-; GCN: .set indirect_use_50_vgpr.num_vgpr, max(41, use_50_vgpr.num_vgpr)
-; GCN: .set indirect_use_50_vgpr.num_agpr, max(0, use_50_vgpr.num_agpr)
-; GCN: .set indirect_use_50_vgpr.numbered_sgpr, max(34, use_50_vgpr.numbered_sgpr)
-; GCN: .set indirect_use_50_vgpr.private_seg_size, 16+(max(use_50_vgpr.private_seg_size))
-; GCN: .set indirect_use_50_vgpr.uses_vcc, or(1, use_50_vgpr.uses_vcc)
-; GCN: .set indirect_use_50_vgpr.uses_flat_scratch, or(0, use_50_vgpr.uses_flat_scratch)
-; GCN: .set indirect_use_50_vgpr.has_dyn_sized_stack, or(0, use_50_vgpr.has_dyn_sized_stack)
-; GCN: .set indirect_use_50_vgpr.has_recursion, or(0, use_50_vgpr.has_recursion)
-; GCN: .set indirect_use_50_vgpr.has_indirect_call, or(0, use_50_vgpr.has_indirect_call)
+; GCN: .set .Lindirect_use_50_vgpr.num_vgpr, max(41, .Luse_50_vgpr.num_vgpr)
+; GCN: .set .Lindirect_use_50_vgpr.num_agpr, max(0, .Luse_50_vgpr.num_agpr)
+; GCN: .set .Lindirect_use_50_vgpr.numbered_sgpr, max(34, .Luse_50_vgpr.numbered_sgpr)
+; GCN: .set .Lindirect_use_50_vgpr.private_seg_size, 16+(max(.Luse_50_vgpr.private_seg_size))
+; GCN: .set .Lindirect_use_50_vgpr.uses_vcc, or(1, .Luse_50_vgpr.uses_vcc)
+; GCN: .set .Lindirect_use_50_vgpr.uses_flat_scratch, or(0, .Luse_50_vgpr.uses_flat_scratch)
+; GCN: .set .Lindirect_use_50_vgpr.has_dyn_sized_stack, or(0, .Luse_50_vgpr.has_dyn_sized_stack)
+; GCN: .set .Lindirect_use_50_vgpr.has_recursion, or(0, .Luse_50_vgpr.has_recursion)
+; GCN: .set .Lindirect_use_50_vgpr.has_indirect_call, or(0, .Luse_50_vgpr.has_indirect_call)
; GCN: TotalNumSgprs: 38
; GCN: NumVgprs: 50
; GCN: ScratchSize: 16
@@ -202,15 +214,15 @@ define void @indirect_use_50_vgpr() #0 {
}
; GCN-LABEL: {{^}}use_80_sgpr:
-; GCN: .set use_80_sgpr.num_vgpr, 1
-; GCN: .set use_80_sgpr.num_agpr, 0
-; GCN: .set use_80_sgpr.numbered_sgpr, 80
-; GCN: .set use_80_sgpr.private_seg_size, 8
-; GCN: .set use_80_sgpr.uses_vcc, 0
-; GCN: .set use_80_sgpr.uses_flat_scratch, 0
-; GCN: .set use_80_sgpr.has_dyn_sized_stack, 0
-; GCN: .set use_80_sgpr.has_recursion, 0
-; GCN: .set use_80_sgpr.has_indirect_call, 0
+; GCN: .set .Luse_80_sgpr.num_vgpr, 1
+; GCN: .set .Luse_80_sgpr.num_agpr, 0
+; GCN: .set .Luse_80_sgpr.numbered_sgpr, 80
+; GCN: .set .Luse_80_sgpr.private_seg_size, 8
+; GCN: .set .Luse_80_sgpr.uses_vcc, 0
+; GCN: .set .Luse_80_sgpr.uses_flat_scratch, 0
+; GCN: .set .Luse_80_sgpr.has_dyn_sized_stack, 0
+; GCN: .set .Luse_80_sgpr.has_recursion, 0
+; GCN: .set .Luse_80_sgpr.has_indirect_call, 0
; GCN: TotalNumSgprs: 84
; GCN: NumVgprs: 1
; GCN: ScratchSize: 8
@@ -220,15 +232,15 @@ define void @use_80_sgpr() #1 {
}
; GCN-LABEL: {{^}}indirect_use_80_sgpr:
-; GCN: .set indirect_use_80_sgpr.num_vgpr, max(41, use_80_sgpr.num_vgpr)
-; GCN: .set indirect_use_80_sgpr.num_agpr, max(0, use_80_sgpr.num_agpr)
-; GCN: .set indirect_use_80_sgpr.numbered_sgpr, max(34, use_80_sgpr.numbered_sgpr)
-; GCN: .set indirect_use_80_sgpr.private_seg_size, 16+(max(use_80_sgpr.private_seg_size))
-; GCN: .set indirect_use_80_sgpr.uses_vcc, or(1, use_80_sgpr.uses_vcc)
-; GCN: .set indirect_use_80_sgpr.uses_flat_scratch, or(0, use_80_sgpr.uses_flat_scratch)
-; GCN: .set indirect_use_80_sgpr.has_dyn_sized_stack, or(0, use_80_sgpr.has_dyn_sized_stack)
-; GCN: .set indirect_use_80_sgpr.has_recursion, or(0, use_80_sgpr.has_recursion)
-; GCN: .set indirect_use_80_sgpr.has_indirect_call, or(0, use_80_sgpr.has_indirect_call)
+; GCN: .set .Lindirect_use_80_sgpr.num_vgpr, max(41, .Luse_80_sgpr.num_vgpr)
+; GCN: .set .Lindirect_use_80_sgpr.num_agpr, max(0, .Luse_80_sgpr.num_agpr)
+; GCN: .set .Lindirect_use_80_sgpr.numbered_sgpr, max(34, .Luse_80_sgpr.numbered_sgpr)
+; GCN: .set .Lindirect_use_80_sgpr.private_seg_size, 16+(max(.Luse_80_sgpr.private_seg_size))
+; GCN: .set .Lindirect_use_80_sgpr.uses_vcc, or(1, .Luse_80_sgpr.uses_vcc)
+; GCN: .set .Lindirect_use_80_sgpr.uses_flat_scratch, or(0, .Luse_80_sgpr.uses_flat_scratch)
+; GCN: .set .Lindirect_use_80_sgpr.has_dyn_sized_stack, or(0, .Luse_80_sgpr.has_dyn_sized_stack)
+; GCN: .set .Lindirect_use_80_sgpr.has_recursion, or(0, .Luse_80_sgpr.has_recursion)
+; GCN: .set .Lindirect_use_80_sgpr.has_indirect_call, or(0, .Luse_80_sgpr.has_indirect_call)
; GCN: TotalNumSgprs: 84
; GCN: NumVgprs: 41
; GCN: ScratchSize: 24
@@ -238,15 +250,15 @@ define void @indirect_use_80_sgpr() #1 {
}
; GCN-LABEL: {{^}}indirect_2_level_use_80_sgpr:
-; GCN: .set indirect_2_level_use_80_sgpr.num_vgpr, max(32, indirect_use_80_sgpr.num_vgpr)
-; GCN: .set indirect_2_level_use_80_sgpr.num_agpr, max(0, indirect_use_80_sgpr.num_agpr)
-; GCN: .set indirect_2_level_use_80_sgpr.numbered_sgpr, max(33, indirect_use_80_sgpr.numbered_sgpr)
-; GCN: .set indirect_2_level_use_80_sgpr.private_seg_size, 0+(max(indirect_use_80_sgpr.private_seg_size))
-; GCN: .set indirect_2_level_use_80_sgpr.uses_vcc, or(1, indirect_use_80_sgpr.uses_vcc)
-; GCN: .set indirect_2_level_use_80_sgpr.uses_flat_scratch, or(1, indirect_use_80_sgpr.uses_flat_scratch)
-; GCN: .set indirect_2_level_use_80_sgpr.has_dyn_sized_stack, or(0, indirect_use_80_sgpr.has_dyn_sized_stack)
-; GCN: .set indirect_2_level_use_80_sgpr.has_recursion, or(0, indirect_use_80_sgpr.has_recursion)
-; GCN: .set indirect_2_level_use_80_sgpr.has_indirect_call, or(0, indirect_use_80_sgpr.has_indirect_call)
+; GCN: .set .Lindirect_2_level_use_80_sgpr.num_vgpr, max(32, .Lindirect_use_80_sgpr.num_vgpr)
+; GCN: .set .Lindirect_2_level_use_80_sgpr.num_agpr, max(0, .Lindirect_use_80_sgpr.num_agpr)
+; GCN: .set .Lindirect_2_level_use_80_sgpr.numbered_sgpr, max(33, .Lindirect_use_80_sgpr.numbered_sgpr)
+; GCN: .set .Lindirect_2_level_use_80_sgpr.private_seg_size, 0+(max(.Lindirect_use_80_sgpr.private_seg_size))
+; GCN: .set .Lindirect_2_level_use_80_sgpr.uses_vcc, or(1, .Lindirect_use_80_sgpr.uses_vcc)
+; GCN: .set .Lindirect_2_level_use_80_sgpr.uses_flat_scratch, or(1, .Lindirect_use_80_sgpr.uses_flat_scratch)
+; GCN: .set .Lindirect_2_level_use_80_sgpr.has_dyn_sized_stack, or(0, .Lindirect_use_80_sgpr.has_dyn_sized_stack)
+; GCN: .set .Lindirect_2_level_use_80_sgpr.has_recursion, or(0, .Lindirect_use_80_sgpr.has_recursion)
+; GCN: .set .Lindirect_2_level_use_80_sgpr.has_indirect_call, or(0, .Lindirect_use_80_sgpr.has_indirect_call)
; GCN: TotalNumSgprs: 86
; GCN: NumVgprs: 41
; GCN: ScratchSize: 24
@@ -256,15 +268,15 @@ define amdgpu_kernel void @indirect_2_level_use_80_sgpr() #0 {
}
; GCN-LABEL: {{^}}use_stack0:
-; GCN: .set use_stack0.num_vgpr, 1
-; GCN: .set use_stack0.num_agpr, 0
-; GCN: .set use_stack0.numbered_sgpr, 33
-; GCN: .set use_stack0.private_seg_size, 2052
-; GCN: .set use_stack0.uses_vcc, 0
-; GCN: .set use_stack0.uses_flat_scratch, 0
-; GCN: .set use_stack0.has_dyn_sized_stack, 0
-; GCN: .set use_stack0.has_recursion, 0
-; GCN: .set use_stack0.has_indirect_call, 0
+; GCN: .set .Luse_stack0.num_vgpr, 1
+; GCN: .set .Luse_stack0.num_agpr, 0
+; GCN: .set .Luse_stack0.numbered_sgpr, 33
+; GCN: .set .Luse_stack0.private_seg_size, 2052
+; GCN: .set .Luse_stack0.uses_vcc, 0
+; GCN: .set .Luse_stack0.uses_flat_scratch, 0
+; GCN: .set .Luse_stack0.has_dyn_sized_stack, 0
+; GCN: .set .Luse_stack0.has_recursion, 0
+; GCN: .set .Luse_stack0.has_indirect_call, 0
; GCN: TotalNumSgprs: 37
; GCN: NumVgprs: 1
; GCN: ScratchSize: 2052
@@ -275,15 +287,15 @@ define void @use_stack0() #1 {
}
; GCN-LABEL: {{^}}use_stack1:
-; GCN: .set use_stack1.num_vgpr, 1
-; GCN: .set use_stack1.num_agpr, 0
-; GCN: .set use_stack1.numbered_sgpr, 33
-; GCN: .set use_stack1.private_seg_size, 404
-; GCN: .set use_stack1.uses_vcc, 0
-; GCN: .set use_stack1.uses_flat_scratch, 0
-; GCN: .set use_stack1.has_dyn_sized_stack, 0
-; GCN: .set use_stack1.has_recursion, 0
-; GCN: .set use_stack1.has_indirect_call, 0
+; GCN: .set .Luse_stack1.num_vgpr, 1
+; GCN: .set .Luse_stack1.num_agpr, 0
+; GCN: .set .Luse_stack1.numbered_sgpr, 33
+; GCN: .set .Luse_stack1.private_seg_size, 404
+; GCN: .set .Luse_stack1.uses_vcc, 0
+; GCN: .set .Luse_stack1.uses_flat_scratch, 0
+; GCN: .set .Luse_stack1.has_dyn_sized_stack, 0
+; GCN: .set .Luse_stack1.has_recursion, 0
+; GCN: .set .Luse_stack1.has_indirect_call, 0
; GCN: TotalNumSgprs: 37
; GCN: NumVgprs: 1
; GCN: ScratchSize: 404
@@ -294,15 +306,15 @@ define void @use_stack1() #1 {
}
; GCN-LABEL: {{^}}indirect_use_stack:
-; GCN: .set indirect_use_stack.num_vgpr, max(41, use_stack0.num_vgpr)
-; GCN: .set indirect_use_stack.num_agpr, max(0, use_stack0.num_agpr)
-; GCN: .set indirect_use_stack.numbered_sgpr, max(34, use_stack0.numbered_sgpr)
-; GCN: .set indirect_use_stack.private_seg_size, 80+(max(use_stack0.private_seg_size))
-; GCN: .set indirect_use_stack.uses_vcc, or(1, use_stack0.uses_vcc)
-; GCN: .set indirect_use_stack.uses_flat_scratch, or(0, use_stack0.uses_flat_scratch)
-; GCN: .set indirect_use_stack.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack)
-; GCN: .set indirect_use_stack.has_recursion, or(0, use_stack0.has_recursion)
-; GCN: .set indirect_use_stack.has_indirect_call, or(0, use_stack0.has_indirect_call)
+; GCN: .set .Lindirect_use_stack.num_vgpr, max(41, .Luse_stack0.num_vgpr)
+; GCN: .set .Lindirect_use_stack.num_agpr, max(0, .Luse_stack0.num_agpr)
+; GCN: .set .Lindirect_use_stack.numbered_sgpr, max(34, .Luse_stack0.numbered_sgpr)
+; GCN: .set .Lindirect_use_stack.private_seg_size, 80+(max(.Luse_stack0.private_seg_size))
+; GCN: .set .Lindirect_use_stack.uses_vcc, or(1, .Luse_stack0.uses_vcc)
+; GCN: .set .Lindirect_use_stack.uses_flat_scratch, or(0, .Luse_stack0.uses_flat_scratch)
+; GCN: .set .Lindirect_use_stack.has_dyn_sized_stack, or(0, .Luse_stack0.has_dyn_sized_stack)
+; GCN: .set .Lindirect_use_stack.has_recursion, or(0, .Luse_stack0.has_recursion)
+; GCN: .set .Lindirect_use_stack.has_indirect_call, or(0, .Luse_stack0.has_indirect_call)
; GCN: TotalNumSgprs: 38
; GCN: NumVgprs: 41
; GCN: ScratchSize: 2132
@@ -314,15 +326,15 @@ define void @indirect_use_stack() #1 {
}
; GCN-LABEL: {{^}}indirect_2_level_use_stack:
-; GCN: .set indirect_2_level_use_stack.num_vgpr, max(32, indirect_use_stack.num_vgpr)
-; GCN: .set indirect_2_level_use_stack.num_agpr, max(0, indirect_use_stack.num_agpr)
-; GCN: .set indirect_2_level_use_stack.numbered_sgpr, max(33, indirect_use_stack.numbered_sgpr)
-; GCN: .set indirect_2_level_use_stack.private_seg_size, 0+(max(indirect_use_stack.private_seg_size))
-; GCN: .set indirect_2_level_use_stack.uses_vcc, or(1, indirect_use_stack.uses_vcc)
-; GCN: .set indirect_2_level_use_stack.uses_flat_scratch, or(1, indirect_use_stack.uses_flat_scratch)
-; GCN: .set indirect_2_level_use_stack.has_dyn_sized_stack, or(0, indirect_use_stack.has_dyn_sized_stack)
-; GCN: .set indirect_2_level_use_stack.has_recursion, or(0, indirect_use_stack.has_recursion)
-; GCN: .set indirect_2_level_use_stack.has_indirect_call, or(0, indirect_use_stack.has_indirect_call)
+; GCN: .set .Lindirect_2_level_use_stack.num_vgpr, max(32, .Lindirect_use_stack.num_vgpr)
+; GCN: .set .Lindirect_2_level_use_stack.num_agpr, max(0, .Lindirect_use_stack.num_agpr)
+; GCN: .set .Lindirect_2_level_use_stack.numbered_sgpr, max(33, .Lindirect_use_stack.numbered_sgpr)
+; GCN: .set .Lindirect_2_level_use_stack.private_seg_size, 0+(max(.Lindirect_use_stack.private_seg_size))
+; GCN: .set .Lindirect_2_level_use_stack.uses_vcc, or(1, .Lindirect_use_stack.uses_vcc)
+; GCN: .set .Lindirect_2_level_use_stack.uses_flat_scratch, or(1, .Lindirect_use_stack.uses_flat_scratch)
+; GCN: .set .Lindirect_2_level_use_stack.has_dyn_sized_stack, or(0, .Lindirect_use_stack.has_dyn_sized_stack)
+; GCN: .set .Lindirect_2_level_use_stack.has_recursion, or(0, .Lindirect_use_stack.has_recursion)
+; GCN: .set .Lindirect_2_level_use_stack.has_indirect_call, or(0, .Lindirect_use_stack.has_indirect_call)
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 2132
@@ -334,15 +346,15 @@ define amdgpu_kernel void @indirect_2_level_use_stack() #0 {
; Should be maximum of callee usage
; GCN-LABEL: {{^}}multi_call_use_use_stack:
-; GCN: .set multi_call_use_use_stack.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr)
-; GCN: .set multi_call_use_use_stack.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr)
-; GCN: .set multi_call_use_use_stack.numbered_sgpr, max(42, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr)
-; GCN: .set multi_call_use_use_stack.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size))
-; GCN: .set multi_call_use_use_stack.uses_vcc, or(1, use_stack0.uses_vcc, use_stack1.uses_vcc)
-; GCN: .set multi_call_use_use_stack.uses_flat_scratch, or(1, use_stack0.uses_flat_scratch, use_stack1.uses_flat_scratch)
-; GCN: .set multi_call_use_use_stack.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack, use_stack1.has_dyn_sized_stack)
-; GCN: .set multi_call_use_use_stack.has_recursion, or(0, use_stack0.has_recursion, use_stack1.has_recursion)
-; GCN: .set multi_call_use_use_stack.has_indirect_call, or(0, use_stack0.has_indirect_call, use_stack1.has_indirect_call)
+; GCN: .set .Lmulti_call_use_use_stack.num_vgpr, max(41, .Luse_stack0.num_vgpr, .Luse_stack1.num_vgpr)
+; GCN: .set .Lmulti_call_use_use_stack.num_agpr, max(0, .Luse_stack0.num_agpr, .Luse_stack1.num_agpr)
+; GCN: .set .Lmulti_call_use_use_stack.numbered_sgpr, max(42, .Luse_stack0.numbered_sgpr, .Luse_stack1.numbered_sgpr)
+; GCN: .set .Lmulti_call_use_use_stack.private_seg_size, 0+(max(.Luse_stack0.private_seg_size, .Luse_stack1.private_seg_size))
+; GCN: .set .Lmulti_call_use_use_stack.uses_vcc, or(1, .Luse_stack0.uses_vcc, .Luse_stack1.uses_vcc)
+; GCN: .set .Lmulti_call_use_use_stack.uses_flat_scratch, or(1, .Luse_stack0.uses_flat_scratch, .Luse_stack1.uses_flat_scratch)
+; GCN: .set .Lmulti_call_use_use_stack.has_dyn_sized_stack, or(0, .Luse_stack0.has_dyn_sized_stack, .Luse_stack1.has_dyn_sized_stack)
+; GCN: .set .Lmulti_call_use_use_stack.has_recursion, or(0, .Luse_stack0.has_recursion, .Luse_stack1.has_recursion)
+; GCN: .set .Lmulti_call_use_use_stack.has_indirect_call, or(0, .Luse_stack0.has_indirect_call, .Luse_stack1.has_indirect_call)
; GCN: TotalNumSgprs: 48
; GCN: NumVgprs: 41
; GCN: ScratchSize: 2052
@@ -355,17 +367,17 @@ define amdgpu_kernel void @multi_call_use_use_stack() #0 {
declare void @external() #0
; GCN-LABEL: {{^}}multi_call_with_external:
-; GCN: .set multi_call_with_external.num_vgpr, max(41, amdgpu.max_num_vgpr)
-; GCN: .set multi_call_with_external.num_agpr, max(0, amdgpu.max_num_agpr)
-; GCN: .set multi_call_with_external.numbered_sgpr, max(42, amdgpu.max_num_sgpr)
-; GCN: .set multi_call_with_external.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size))
-; GCN: .set multi_call_with_external.uses_vcc, 1
-; GCN: .set multi_call_with_external.uses_flat_scratch, 1
-; GCN: .set multi_call_with_external.has_dyn_sized_stack, 1
-; GCN: .set multi_call_with_external.has_recursion, 0
-; GCN: .set multi_call_with_external.has_indirect_call, 1
-; GCN: TotalNumSgprs: multi_call_with_external.numbered_sgpr+6
-; GCN: NumVgprs: multi_call_with_external.num_vgpr
+; GCN: .set .Lmulti_call_with_external.num_vgpr, max(41, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lmulti_call_with_external.num_agpr, max(0, .Lamdgpu.max_num_agpr)
+; GCN: .set .Lmulti_call_with_external.numbered_sgpr, max(42, .Lamdgpu.max_num_sgpr)
+; GCN: .set .Lmulti_call_with_external.private_seg_size, 0+(max(.Luse_stack0.private_seg_size, .Luse_stack1.private_seg_size))
+; GCN: .set .Lmulti_call_with_external.uses_vcc, 1
+; GCN: .set .Lmulti_call_with_external.uses_flat_scratch, 1
+; GCN: .set .Lmulti_call_with_external.has_dyn_sized_stack, 1
+; GCN: .set .Lmulti_call_with_external.has_recursion, 0
+; GCN: .set .Lmulti_call_with_external.has_indirect_call, 1
+; GCN: TotalNumSgprs: .Lmulti_call_with_external.numbered_sgpr+6
+; GCN: NumVgprs: .Lmulti_call_with_external.num_vgpr
; GCN: ScratchSize: 2052
define amdgpu_kernel void @multi_call_with_external() #0 {
call void @use_stack0()
@@ -375,17 +387,17 @@ define amdgpu_kernel void @multi_call_with_external() #0 {
}
; GCN-LABEL: {{^}}multi_call_with_external_and_duplicates:
-; GCN: .set multi_call_with_external_and_duplicates.num_vgpr, max(41, amdgpu.max_num_vgpr)
-; GCN: .set multi_call_with_external_and_duplicates.num_agpr, max(0, amdgpu.max_num_agpr)
-; GCN: .set multi_call_with_external_and_duplicates.numbered_sgpr, max(44, amdgpu.max_num_sgpr)
-; GCN: .set multi_call_with_external_and_duplicates.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size))
-; GCN: .set multi_call_with_external_and_duplicates.uses_vcc, 1
-; GCN: .set multi_call_with_external_and_duplicates.uses_flat_scratch, 1
-; GCN: .set multi_call_with_external_and_duplicates.has_dyn_sized_stack, 1
-; GCN: .set multi_call_with_external_and_duplicates.has_recursion, 0
-; GCN: .set multi_call_with_external_and_duplicates.has_indirect_call, 1
-; GCN: TotalNumSgprs: multi_call_with_external_and_duplicates.numbered_sgpr+6
-; GCN: NumVgprs: multi_call_with_external_and_duplicates.num_vgpr
+; GCN: .set .Lmulti_call_with_external_and_duplicates.num_vgpr, max(41, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lmulti_call_with_external_and_duplicates.num_agpr, max(0, .Lamdgpu.max_num_agpr)
+; GCN: .set .Lmulti_call_with_external_and_duplicates.numbered_sgpr, max(44, .Lamdgpu.max_num_sgpr)
+; GCN: .set .Lmulti_call_with_external_and_duplicates.private_seg_size, 0+(max(.Luse_stack0.private_seg_size, .Luse_stack1.private_seg_size))
+; GCN: .set .Lmulti_call_with_external_and_duplicates.uses_vcc, 1
+; GCN: .set .Lmulti_call_with_external_and_duplicates.uses_flat_scratch, 1
+; GCN: .set .Lmulti_call_with_external_and_duplicates.has_dyn_sized_stack, 1
+; GCN: .set .Lmulti_call_with_external_and_duplicates.has_recursion, 0
+; GCN: .set .Lmulti_call_with_external_and_duplicates.has_indirect_call, 1
+; GCN: TotalNumSgprs: .Lmulti_call_with_external_and_duplicates.numbered_sgpr+6
+; GCN: NumVgprs: .Lmulti_call_with_external_and_duplicates.num_vgpr
; GCN: ScratchSize: 2052
define amdgpu_kernel void @multi_call_with_external_and_duplicates() #0 {
call void @use_stack0()
@@ -398,17 +410,17 @@ define amdgpu_kernel void @multi_call_with_external_and_duplicates() #0 {
}
; GCN-LABEL: {{^}}usage_external:
-; GCN: .set usage_external.num_vgpr, max(32, amdgpu.max_num_vgpr)
-; GCN: .set usage_external.num_agpr, max(0, amdgpu.max_num_agpr)
-; GCN: .set usage_external.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
-; GCN: .set usage_external.private_seg_size, 0
-; GCN: .set usage_external.uses_vcc, 1
-; GCN: .set usage_external.uses_flat_scratch, 1
-; GCN: .set usage_external.has_dyn_sized_stack, 1
-; GCN: .set usage_external.has_recursion, 0
-; GCN: .set usage_external.has_indirect_call, 1
-; GCN: TotalNumSgprs: usage_external.numbered_sgpr+6
-; GCN: NumVgprs: usage_external.num_vgpr
+; GCN: .set .Lusage_external.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lusage_external.num_agpr, max(0, .Lamdgpu.max_num_agpr)
+; GCN: .set .Lusage_external.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
+; GCN: .set .Lusage_external.private_seg_size, 0
+; GCN: .set .Lusage_external.uses_vcc, 1
+; GCN: .set .Lusage_external.uses_flat_scratch, 1
+; GCN: .set .Lusage_external.has_dyn_sized_stack, 1
+; GCN: .set .Lusage_external.has_recursion, 0
+; GCN: .set .Lusage_external.has_indirect_call, 1
+; GCN: TotalNumSgprs: .Lusage_external.numbered_sgpr+6
+; GCN: NumVgprs: .Lusage_external.num_vgpr
; GCN: ScratchSize: 0
define amdgpu_kernel void @usage_external() #0 {
call void @external()
@@ -418,17 +430,17 @@ define amdgpu_kernel void @usage_external() #0 {
declare void @external_recurse() #2
; GCN-LABEL: {{^}}usage_external_recurse:
-; GCN: .set usage_external_recurse.num_vgpr, max(32, amdgpu.max_num_vgpr)
-; GCN: .set usage_external_recurse.num_agpr, max(0, amdgpu.max_num_agpr)
-; GCN: .set usage_external_recurse.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
-; GCN: .set usage_external_recurse.private_seg_size, 0
-; GCN: .set usage_external_recurse.uses_vcc, 1
-; GCN: .set usage_external_recurse.uses_flat_scratch, 1
-; GCN: .set usage_external_recurse.has_dyn_sized_stack, 1
-; GCN: .set usage_external_recurse.has_recursion, 1
-; GCN: .set usage_external_recurse.has_indirect_call, 1
-; GCN: TotalNumSgprs: usage_external_recurse.numbered_sgpr+6
-; GCN: NumVgprs: usage_external_recurse.num_vgpr
+; GCN: .set .Lusage_external_recurse.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lusage_external_recurse.num_agpr, max(0, .Lamdgpu.max_num_agpr)
+; GCN: .set .Lusage_external_recurse.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
+; GCN: .set .Lusage_external_recurse.private_seg_size, 0
+; GCN: .set .Lusage_external_recurse.uses_vcc, 1
+; GCN: .set .Lusage_external_recurse.uses_flat_scratch, 1
+; GCN: .set .Lusage_external_recurse.has_dyn_sized_stack, 1
+; GCN: .set .Lusage_external_recurse.has_recursion, 1
+; GCN: .set .Lusage_external_recurse.has_indirect_call, 1
+; GCN: TotalNumSgprs: .Lusage_external_recurse.numbered_sgpr+6
+; GCN: NumVgprs: .Lusage_external_recurse.num_vgpr
; GCN: ScratchSize: 0
define amdgpu_kernel void @usage_external_recurse() #0 {
call void @external_recurse()
@@ -436,15 +448,15 @@ define amdgpu_kernel void @usage_external_recurse() #0 {
}
; GCN-LABEL: {{^}}direct_recursion_use_stack:
-; GCN: .set direct_recursion_use_stack.num_vgpr, 41
-; GCN: .set direct_recursion_use_stack.num_agpr, 0
-; GCN: .set direct_recursion_use_stack.numbered_sgpr, 36
-; GCN: .set direct_recursion_use_stack.private_seg_size, 2064
-; GCN: .set direct_recursion_use_stack.uses_vcc, 1
-; GCN: .set direct_recursion_use_stack.uses_flat_scratch, 0
-; GCN: .set direct_recursion_use_stack.has_dyn_sized_stack, 0
-; GCN: .set direct_recursion_use_stack.has_recursion, 1
-; GCN: .set direct_recursion_use_stack.has_indirect_call, 0
+; GCN: .set .Ldirect_recursion_use_stack.num_vgpr, 41
+; GCN: .set .Ldirect_recursion_use_stack.num_agpr, 0
+; GCN: .set .Ldirect_recursion_use_stack.numbered_sgpr, 36
+; GCN: .set .Ldirect_recursion_use_stack.private_seg_size, 2064
+; GCN: .set .Ldirect_recursion_use_stack.uses_vcc, 1
+; GCN: .set .Ldirect_recursion_use_stack.uses_flat_scratch, 0
+; GCN: .set .Ldirect_recursion_use_stack.has_dyn_sized_stack, 0
+; GCN: .set .Ldirect_recursion_use_stack.has_recursion, 1
+; GCN: .set .Ldirect_recursion_use_stack.has_indirect_call, 0
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 2064
@@ -464,15 +476,15 @@ ret:
}
; GCN-LABEL: {{^}}usage_direct_recursion:
-; GCN: .set usage_direct_recursion.num_vgpr, max(32, direct_recursion_use_stack.num_vgpr)
-; GCN: .set usage_direct_recursion.num_agpr, max(0, direct_recursion_use_stack.num_agpr)
-; GCN: .set usage_direct_recursion.numbered_sgpr, max(33, direct_recursion_use_stack.numbered_sgpr)
-; GCN: .set usage_direct_recursion.private_seg_size, 0+(max(direct_recursion_use_stack.private_seg_size))
-; GCN: .set usage_direct_recursion.uses_vcc, or(1, direct_recursion_use_stack.uses_vcc)
-; GCN: .set usage_direct_recursion.uses_flat_scratch, or(1, direct_recursion_use_stack.uses_flat_scratch)
-; GCN: .set usage_direct_recursion.has_dyn_sized_stack, or(0, direct_recursion_use_stack.has_dyn_sized_stack)
-; GCN: .set usage_direct_recursion.has_recursion, or(1, direct_recursion_use_stack.has_recursion)
-; GCN: .set usage_direct_recursion.has_indirect_call, or(0, direct_recursion_use_stack.has_indirect_call)
+; GCN: .set .Lusage_direct_recursion.num_vgpr, max(32, .Ldirect_recursion_use_stack.num_vgpr)
+; GCN: .set .Lusage_direct_recursion.num_agpr, max(0, .Ldirect_recursion_use_stack.num_agpr)
+; GCN: .set .Lusage_direct_recursion.numbered_sgpr, max(33, .Ldirect_recursion_use_stack.numbered_sgpr)
+; GCN: .set .Lusage_direct_recursion.private_seg_size, 0+(max(.Ldirect_recursion_use_stack.private_seg_size))
+; GCN: .set .Lusage_direct_recursion.uses_vcc, or(1, .Ldirect_recursion_use_stack.uses_vcc)
+; GCN: .set .Lusage_direct_recursion.uses_flat_scratch, or(1, .Ldirect_recursion_use_stack.uses_flat_scratch)
+; GCN: .set .Lusage_direct_recursion.has_dyn_sized_stack, or(0, .Ldirect_recursion_use_stack.has_dyn_sized_stack)
+; GCN: .set .Lusage_direct_recursion.has_recursion, or(1, .Ldirect_recursion_use_stack.has_recursion)
+; GCN: .set .Lusage_direct_recursion.has_indirect_call, or(0, .Ldirect_recursion_use_stack.has_indirect_call)
; GCN: TotalNumSgprs: 42
; GCN: NumVgprs: 41
; GCN: ScratchSize: 2064
@@ -483,17 +495,17 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
; Make sure there's no assert when a sgpr96 is used.
; GCN-LABEL: {{^}}count_use_sgpr96_external_call
-; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
-; GCN: .set count_use_sgpr96_external_call.num_agpr, max(0, amdgpu.max_num_agpr)
-; GCN: .set count_use_sgpr96_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
-; GCN: .set count_use_sgpr96_external_call.private_seg_size, 0
-; GCN: .set count_use_sgpr96_external_call.uses_vcc, 1
-; GCN: .set count_use_sgpr96_external_call.uses_flat_scratch, 1
-; GCN: .set count_use_sgpr96_external_call.has_dyn_sized_stack, 1
-; GCN: .set count_use_sgpr96_external_call.has_recursion, 0
-; GCN: .set count_use_sgpr96_external_call.has_indirect_call, 1
-; GCN: TotalNumSgprs: count_use_sgpr96_external_call.numbered_sgpr+6
-; GCN: NumVgprs: count_use_sgpr96_external_call.num_vgpr
+; GCN: .set .Lcount_use_sgpr96_external_call.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lcount_use_sgpr96_external_call.num_agpr, max(0, .Lamdgpu.max_num_agpr)
+; GCN: .set .Lcount_use_sgpr96_external_call.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
+; GCN: .set .Lcount_use_sgpr96_external_call.private_seg_size, 0
+; GCN: .set .Lcount_use_sgpr96_external_call.uses_vcc, 1
+; GCN: .set .Lcount_use_sgpr96_external_call.uses_flat_scratch, 1
+; GCN: .set .Lcount_use_sgpr96_external_call.has_dyn_sized_stack, 1
+; GCN: .set .Lcount_use_sgpr96_external_call.has_recursion, 0
+; GCN: .set .Lcount_use_sgpr96_external_call.has_indirect_call, 1
+; GCN: TotalNumSgprs: .Lcount_use_sgpr96_external_call.numbered_sgpr+6
+; GCN: NumVgprs: .Lcount_use_sgpr96_external_call.num_vgpr
; GCN: ScratchSize: 0
define amdgpu_kernel void @count_use_sgpr96_external_call() {
entry:
@@ -504,17 +516,17 @@ entry:
; Make sure there's no assert when a sgpr160 is used.
; GCN-LABEL: {{^}}count_use_sgpr160_external_call
-; GCN: .set count_use_sgpr160_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
-; GCN: .set count_use_sgpr160_external_call.num_agpr, max(0, amdgpu.max_num_agpr)
-; GCN: .set count_use_sgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
-; GCN: .set count_use_sgpr160_external_call.private_seg_size, 0
-; GCN: .set count_use_sgpr160_external_call.uses_vcc, 1
-; GCN: .set count_use_sgpr160_external_call.uses_flat_scratch, 1
-; GCN: .set count_use_sgpr160_external_call.has_dyn_sized_stack, 1
-; GCN: .set count_use_sgpr160_external_call.has_recursion, 0
-; GCN: .set count_use_sgpr160_external_call.has_indirect_call, 1
-; GCN: TotalNumSgprs: count_use_sgpr160_external_call.numbered_sgpr+6
-; GCN: NumVgprs: count_use_sgpr160_external_call.num_vgpr
+; GCN: .set .Lcount_use_sgpr160_external_call.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lcount_use_sgpr160_external_call.num_agpr, max(0, .Lamdgpu.max_num_agpr)
+; GCN: .set .Lcount_use_sgpr160_external_call.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
+; GCN: .set .Lcount_use_sgpr160_external_call.private_seg_size, 0
+; GCN: .set .Lcount_use_sgpr160_external_call.uses_vcc, 1
+; GCN: .set .Lcount_use_sgpr160_external_call.uses_flat_scratch, 1
+; GCN: .set .Lcount_use_sgpr160_external_call.has_dyn_sized_stack, 1
+; GCN: .set .Lcount_use_sgpr160_external_call.has_recursion, 0
+; GCN: .set .Lcount_use_sgpr160_external_call.has_indirect_call, 1
+; GCN: TotalNumSgprs: .Lcount_use_sgpr160_external_call.numbered_sgpr+6
+; GCN: NumVgprs: .Lcount_use_sgpr160_external_call.num_vgpr
; GCN: ScratchSize: 0
define amdgpu_kernel void @count_use_sgpr160_external_call() {
entry:
@@ -525,17 +537,17 @@ entry:
; Make sure there's no assert when a vgpr160 is used.
; GCN-LABEL: {{^}}count_use_vgpr160_external_call
-; GCN: .set count_use_vgpr160_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
-; GCN: .set count_use_vgpr160_external_call.num_agpr, max(0, amdgpu.max_num_agpr)
-; GCN: .set count_use_vgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
-; GCN: .set count_use_vgpr160_external_call.private_seg_size, 0
-; GCN: .set count_use_vgpr160_external_call.uses_vcc, 1
-; GCN: .set count_use_vgpr160_external_call.uses_flat_scratch, 1
-; GCN: .set count_use_vgpr160_external_call.has_dyn_sized_stack, 1
-; GCN: .set count_use_vgpr160_external_call.has_recursion, 0
-; GCN: .set count_use_vgpr160_external_call.has_indirect_call, 1
-; GCN: TotalNumSgprs: count_use_vgpr160_external_call.numbered_sgpr+6
-; GCN: NumVgprs: count_use_vgpr160_external_call.num_vgpr
+; GCN: .set .Lcount_use_vgpr160_external_call.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lcount_use_vgpr160_external_call.num_agpr, max(0, .Lamdgpu.max_num_agpr)
+; GCN: .set .Lcount_use_vgpr160_external_call.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
+; GCN: .set .Lcount_use_vgpr160_external_call.private_seg_size, 0
+; GCN: .set .Lcount_use_vgpr160_external_call.uses_vcc, 1
+; GCN: .set .Lcount_use_vgpr160_external_call.uses_flat_scratch, 1
+; GCN: .set .Lcount_use_vgpr160_external_call.has_dyn_sized_stack, 1
+; GCN: .set .Lcount_use_vgpr160_external_call.has_recursion, 0
+; GCN: .set .Lcount_use_vgpr160_external_call.has_indirect_call, 1
+; GCN: TotalNumSgprs: .Lcount_use_vgpr160_external_call.numbered_sgpr+6
+; GCN: NumVgprs: .Lcount_use_vgpr160_external_call.num_vgpr
; GCN: ScratchSize: 0
define amdgpu_kernel void @count_use_vgpr160_external_call() {
entry:
@@ -545,9 +557,9 @@ entry:
}
; Added at the of the .s are the module level maximums
-; GCN: .set amdgpu.max_num_vgpr, 50
-; GCN: .set amdgpu.max_num_agpr, 0
-; GCN: .set amdgpu.max_num_sgpr, 80
+; GCN: .set .Lamdgpu.max_num_vgpr, 50
+; GCN: .set .Lamdgpu.max_num_agpr, 0
+; GCN: .set .Lamdgpu.max_num_sgpr, 80
attributes #0 = { nounwind noinline norecurse }
attributes #1 = { nounwind noinline norecurse }
diff --git a/llvm/test/CodeGen/AMDGPU/recursion.ll b/llvm/test/CodeGen/AMDGPU/recursion.ll
index c0d228e1254e64..c19029275329da 100644
--- a/llvm/test/CodeGen/AMDGPU/recursion.ll
+++ b/llvm/test/CodeGen/AMDGPU/recursion.ll
@@ -3,11 +3,11 @@
; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=V5 %s
; CHECK-LABEL: {{^}}recursive:
-; CHECK: .set recursive.private_seg_size, 16+(max(16384))
+; CHECK: .set .Lrecursive.private_seg_size, 16+(max(16384))
; CHECK: ScratchSize: 16
; V5-LABEL: {{^}}recursive:
-; V5: .set recursive.has_recursion, 1
+; V5: .set .Lrecursive.has_recursion, 1
define void @recursive() {
call void @recursive()
store volatile i32 0, ptr addrspace(1) undef
@@ -15,22 +15,22 @@ define void @recursive() {
}
; CHECK-LABEL: {{^}}tail_recursive:
-; CHECK: .set tail_recursive.private_seg_size, 0
+; CHECK: .set .Ltail_recursive.private_seg_size, 0
; CHECK: ScratchSize: 0
define void @tail_recursive() {
tail call void @tail_recursive()
ret void
}
-; CHECK: .set calls_tail_recursive.private_seg_size, 0+(max(tail_recursive.private_seg_size))
+; CHECK: .set .Lcalls_tail_recursive.private_seg_size, 0+(max(.Ltail_recursive.private_seg_size))
define void @calls_tail_recursive() norecurse {
tail call void @tail_recursive()
ret void
}
; CHECK-LABEL: {{^}}tail_recursive_with_stack:
-; CHECK: .set tail_recursive_with_stack.private_seg_size, 8
-; CHECK: .set tail_recursive_with_stack.has_recursion, 1
+; CHECK: .set .Ltail_recursive_with_stack.private_seg_size, 8
+; CHECK: .set .Ltail_recursive_with_stack.has_recursion, 1
define void @tail_recursive_with_stack() {
%alloca = alloca i32, addrspace(5)
store volatile i32 0, ptr addrspace(5) %alloca
@@ -41,11 +41,11 @@ define void @tail_recursive_with_stack() {
; For an arbitrary recursive call, report a large number for unknown stack
; usage for code object v4 and older
; CHECK-LABEL: {{^}}calls_recursive:
-; CHECK: .set calls_recursive.private_seg_size, 0+(max(16384, recursive.private_seg_size))
+; CHECK: .set .Lcalls_recursive.private_seg_size, 0+(max(16384, .Lrecursive.private_seg_size))
;
; V5-LABEL: {{^}}calls_recursive:
-; V5: .set calls_recursive.private_seg_size, 0+(max(recursive.private_seg_size))
-; V5: .set calls_recursive.has_dyn_sized_stack, or(0, recursive.has_dyn_sized_stack)
+; V5: .set .Lcalls_recursive.private_seg_size, 0+(max(.Lrecursive.private_seg_size))
+; V5: .set .Lcalls_recursive.has_dyn_sized_stack, or(0, .Lrecursive.has_dyn_sized_stack)
define amdgpu_kernel void @calls_recursive() {
call void @recursive()
ret void
@@ -54,7 +54,7 @@ define amdgpu_kernel void @calls_recursive() {
; Make sure we do not report a huge stack size for tail recursive
; functions
; CHECK-LABEL: {{^}}kernel_indirectly_calls_tail_recursive:
-; CHECK: .set kernel_indirectly_calls_tail_recursive.private_seg_size, 0+(max(calls_tail_recursive.private_seg_size))
+; CHECK: .set .Lkernel_indirectly_calls_tail_recursive.private_seg_size, 0+(max(.Lcalls_tail_recursive.private_seg_size))
define amdgpu_kernel void @kernel_indirectly_calls_tail_recursive() {
call void @calls_tail_recursive()
ret void
@@ -65,22 +65,22 @@ define amdgpu_kernel void @kernel_indirectly_calls_tail_recursive() {
; in the kernel.
; CHECK-LABEL: {{^}}kernel_calls_tail_recursive:
-; CHECK: .set kernel_calls_tail_recursive.private_seg_size, 0+(max(16384, tail_recursive.private_seg_size))
+; CHECK: .set .Lkernel_calls_tail_recursive.private_seg_size, 0+(max(16384, .Ltail_recursive.private_seg_size))
;
; V5-LABEL: {{^}}kernel_calls_tail_recursive:
-; V5: .set kernel_calls_tail_recursive.private_seg_size, 0+(max(tail_recursive.private_seg_size))
-; V5: .set kernel_calls_tail_recursive.has_recursion, or(1, tail_recursive.has_recursion)
+; V5: .set .Lkernel_calls_tail_recursive.private_seg_size, 0+(max(.Ltail_recursive.private_seg_size))
+; V5: .set .Lkernel_calls_tail_recursive.has_recursion, or(1, .Ltail_recursive.has_recursion)
define amdgpu_kernel void @kernel_calls_tail_recursive() {
call void @tail_recursive()
ret void
}
; CHECK-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
-; CHECK: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(16384, tail_recursive_with_stack.private_seg_size))
+; CHECK: .set .Lkernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(16384, .Ltail_recursive_with_stack.private_seg_size))
;
; V5-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
-; V5: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(tail_recursive_with_stack.private_seg_size))
-; V5: .set kernel_calls_tail_recursive_with_stack.has_dyn_sized_stack, or(0, tail_recursive_with_stack.has_dyn_sized_stack)
+; V5: .set .Lkernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(.Ltail_recursive_with_stack.private_seg_size))
+; V5: .set .Lkernel_calls_tail_recursive_with_stack.has_dyn_sized_stack, or(0, .Ltail_recursive_with_stack.has_dyn_sized_stack)
define amdgpu_kernel void @kernel_calls_tail_recursive_with_stack() {
call void @tail_recursive_with_stack()
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
index 8bbae59f468f1d..849b1e2a7fce43 100644
--- a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
+++ b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
@@ -141,12 +141,12 @@ define void @empty_func() !dbg !8 {
}
; STDERR: remark: foo.cl:64:0: Function Name: test_indirect_call
-; STDERR-NEXT: remark: foo.cl:64:0: TotalSGPRs: test_indirect_call.numbered_sgpr+6
-; STDERR-NEXT: remark: foo.cl:64:0: VGPRs: test_indirect_call.num_vgpr
-; STDERR-NEXT: remark: foo.cl:64:0: AGPRs: test_indirect_call.num_agpr
+; STDERR-NEXT: remark: foo.cl:64:0: TotalSGPRs: .Ltest_indirect_call.numbered_sgpr+6
+; STDERR-NEXT: remark: foo.cl:64:0: VGPRs: .Ltest_indirect_call.num_vgpr
+; STDERR-NEXT: remark: foo.cl:64:0: AGPRs: .Ltest_indirect_call.num_agpr
; STDERR-NEXT: remark: foo.cl:64:0: ScratchSize [bytes/lane]: 0
; STDERR-NEXT: remark: foo.cl:64:0: Dynamic Stack: True
-; STDERR-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 8, max(test_indirect_call.numbered_sgpr+(extrasgprs(test_indirect_call.uses_vcc, test_indirect_call.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(test_indirect_call.num_agpr, test_indirect_call.num_vgpr), 1, 0))
+; STDERR-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 8, max(.Ltest_indirect_call.numbered_sgpr+(extrasgprs(.Ltest_indirect_call.uses_vcc, .Ltest_indirect_call.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(.Ltest_indirect_call.num_agpr, .Ltest_indirect_call.num_vgpr), 1, 0))
; STDERR-NEXT: remark: foo.cl:64:0: SGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:64:0: VGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:64:0: LDS Size [bytes/block]: 0
@@ -159,12 +159,12 @@ define amdgpu_kernel void @test_indirect_call() !dbg !9 {
}
; STDERR: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack
-; STDERR-NEXT: remark: foo.cl:74:0: TotalSGPRs: test_indirect_w_static_stack.numbered_sgpr+6
-; STDERR-NEXT: remark: foo.cl:74:0: VGPRs: test_indirect_w_static_stack.num_vgpr
-; STDERR-NEXT: remark: foo.cl:74:0: AGPRs: test_indirect_w_static_stack.num_agpr
+; STDERR-NEXT: remark: foo.cl:74:0: TotalSGPRs: .Ltest_indirect_w_static_stack.numbered_sgpr+6
+; STDERR-NEXT: remark: foo.cl:74:0: VGPRs: .Ltest_indirect_w_static_stack.num_vgpr
+; STDERR-NEXT: remark: foo.cl:74:0: AGPRs: .Ltest_indirect_w_static_stack.num_agpr
; STDERR-NEXT: remark: foo.cl:74:0: ScratchSize [bytes/lane]: 144
; STDERR-NEXT: remark: foo.cl:74:0: Dynamic Stack: True
-; STDERR-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 8, max(test_indirect_w_static_stack.numbered_sgpr+(extrasgprs(test_indirect_w_static_stack.uses_vcc, test_indirect_w_static_stack.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(test_indirect_w_static_stack.num_agpr, test_indirect_w_static_stack.num_vgpr), 1, 0))
+; STDERR-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 8, max(.Ltest_indirect_w_static_stack.numbered_sgpr+(extrasgprs(.Ltest_indirect_w_static_stack.uses_vcc, .Ltest_indirect_w_static_stack.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(.Ltest_indirect_w_static_stack.num_agpr, .Ltest_indirect_w_static_stack.num_vgpr), 1, 0))
; STDERR-NEXT: remark: foo.cl:74:0: SGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:74:0: VGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:74:0: LDS Size [bytes/block]: 0
diff --git a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
index 5d5aad76afd095..bd7b473fd806f2 100644
--- a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
+++ b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
@@ -23,8 +23,8 @@ define internal fastcc void @unreachable() {
; GCN-NOT: .amdhsa_uses_dynamic_stack
; GCN-V5: .amdhsa_uses_dynamic_stack
-; ALL: .set entry.private_seg_size, 0
-; ALL: .set entry.has_dyn_sized_stack, 0
+; ALL: .set .Lentry.private_seg_size, 0
+; ALL: .set .Lentry.has_dyn_sized_stack, 0
define amdgpu_kernel void @entry() {
bb0:
br i1 false, label %bb1, label %bb2
More information about the llvm-commits
mailing list