[llvm] [AMDGPU] Change scope of resource usage info symbols (PR #114810)
Janek van Oirschot via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 20 12:31:54 PST 2025
https://github.com/JanekvO updated https://github.com/llvm/llvm-project/pull/114810
>From 416d3bc9c7108b87accc04c0106faa2ea9072a62 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Fri, 1 Nov 2024 18:06:35 +0000
Subject: [PATCH 1/2] [AMDGPU] Change scope of resource usage info symbols such
that they don't end up in the object file
---
.../Target/AMDGPU/AMDGPUMCResourceInfo.cpp | 17 +-
.../CodeGen/AMDGPU/agpr-register-count.ll | 34 +-
.../amdpal-metadata-agpr-register-count.ll | 6 +-
...-amdgpu-flat-work-group-size-vgpr-limit.ll | 56 +-
.../AMDGPU/call-alias-register-usage-agpr.ll | 12 +-
.../AMDGPU/call-alias-register-usage0.ll | 6 +-
.../AMDGPU/call-alias-register-usage1.ll | 10 +-
.../AMDGPU/call-alias-register-usage2.ll | 10 +-
.../AMDGPU/call-alias-register-usage3.ll | 10 +-
.../AMDGPU/call-graph-register-usage.ll | 30 +-
.../CodeGen/AMDGPU/function-resource-usage.ll | 488 +++++++++---------
llvm/test/CodeGen/AMDGPU/recursion.ll | 32 +-
.../AMDGPU/resource-optimization-remarks.ll | 16 +-
.../AMDGPU/resource-usage-dead-function.ll | 4 +-
14 files changed, 376 insertions(+), 355 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index 9511b6bb7de062..f5dd67f57cbec6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -15,6 +15,7 @@
#include "AMDGPUMCResourceInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetMachine.h"
@@ -24,7 +25,9 @@ using namespace llvm;
MCSymbol *MCResourceInfo::getSymbol(StringRef FuncName, ResourceInfoKind RIK,
MCContext &OutContext) {
auto GOCS = [FuncName, &OutContext](StringRef Suffix) {
- return OutContext.getOrCreateSymbol(FuncName + Twine(Suffix));
+ return OutContext.getOrCreateSymbol(
+ Twine(OutContext.getAsmInfo()->getPrivateGlobalPrefix()) + FuncName +
+ Twine(Suffix));
};
switch (RIK) {
case RIK_NumVGPR:
@@ -80,15 +83,21 @@ void MCResourceInfo::finalize(MCContext &OutContext) {
}
MCSymbol *MCResourceInfo::getMaxVGPRSymbol(MCContext &OutContext) {
- return OutContext.getOrCreateSymbol("amdgpu.max_num_vgpr");
+ StringRef PrivatePrefix = OutContext.getAsmInfo()->getPrivateGlobalPrefix();
+ return OutContext.getOrCreateSymbol(Twine(PrivatePrefix) +
+ "amdgpu.max_num_vgpr");
}
MCSymbol *MCResourceInfo::getMaxAGPRSymbol(MCContext &OutContext) {
- return OutContext.getOrCreateSymbol("amdgpu.max_num_agpr");
+ StringRef PrivatePrefix = OutContext.getAsmInfo()->getPrivateGlobalPrefix();
+ return OutContext.getOrCreateSymbol(Twine(PrivatePrefix) +
+ "amdgpu.max_num_agpr");
}
MCSymbol *MCResourceInfo::getMaxSGPRSymbol(MCContext &OutContext) {
- return OutContext.getOrCreateSymbol("amdgpu.max_num_sgpr");
+ StringRef PrivatePrefix = OutContext.getAsmInfo()->getPrivateGlobalPrefix();
+ return OutContext.getOrCreateSymbol(Twine(PrivatePrefix) +
+ "amdgpu.max_num_sgpr");
}
void MCResourceInfo::assignResourceInfoExpr(
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
index 0e16ea10c019ac..b1dd4fecab2cb9 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
@@ -154,28 +154,28 @@ bb:
declare void @undef_func()
; GCN-LABEL: {{^}}kernel_call_undef_func:
-; GCN: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0)
-; GFX90A: .amdhsa_accum_offset ((((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
-; GCN: .set kernel_call_undef_func.num_vgpr, max(32, amdgpu.max_num_vgpr)
-; GCN: .set kernel_call_undef_func.num_agpr, max(0, amdgpu.max_num_agpr)
-; GCN: NumVgprs: kernel_call_undef_func.num_vgpr
-; GCN: NumAgprs: kernel_call_undef_func.num_agpr
-; GCN: TotalNumVgprs: totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr)
-; GFX908: VGPRBlocks: ((alignto(max(max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0), 1), 4))/4)-1
-; GFX90A: VGPRBlocks: ((alignto(max(max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0), 1), 8))/8)-1
-; GCN: NumVGPRsForWavesPerEU: max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0)
-; GFX90A: AccumOffset: ((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)+1)*4
-; GFX908: Occupancy: occupancy(10, 4, 256, 8, 10, max(kernel_call_undef_func.numbered_sgpr+(extrasgprs(kernel_call_undef_func.uses_vcc, kernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0))
-; GFX90A: Occupancy: occupancy(8, 8, 512, 8, 8, max(kernel_call_undef_func.numbered_sgpr+(extrasgprs(kernel_call_undef_func.uses_vcc, kernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0))
-; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: ((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63
+; GCN: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0)
+; GFX90A: .amdhsa_accum_offset ((((((alignto(max(1, .Lkernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
+; GCN: .set .Lkernel_call_undef_func.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lkernel_call_undef_func.num_agpr, max(0, .Lamdgpu.max_num_agpr)
+; GCN: NumVgprs: .Lkernel_call_undef_func.num_vgpr
+; GCN: NumAgprs: .Lkernel_call_undef_func.num_agpr
+; GCN: TotalNumVgprs: totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr)
+; GFX908: VGPRBlocks: ((alignto(max(max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0), 1), 4))/4)-1
+; GFX90A: VGPRBlocks: ((alignto(max(max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0), 1), 8))/8)-1
+; GCN: NumVGPRsForWavesPerEU: max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0)
+; GFX90A: AccumOffset: ((((alignto(max(1, .Lkernel_call_undef_func.num_vgpr), 4))/4)-1)+1)*4
+; GFX908: Occupancy: occupancy(10, 4, 256, 8, 10, max(.Lkernel_call_undef_func.numbered_sgpr+(extrasgprs(.Lkernel_call_undef_func.uses_vcc, .Lkernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0))
+; GFX90A: Occupancy: occupancy(8, 8, 512, 8, 8, max(.Lkernel_call_undef_func.numbered_sgpr+(extrasgprs(.Lkernel_call_undef_func.uses_vcc, .Lkernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0))
+; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: ((((alignto(max(1, .Lkernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63
define amdgpu_kernel void @kernel_call_undef_func() #0 {
bb:
call void @undef_func()
ret void
}
-; GCN: .set amdgpu.max_num_vgpr, 32
-; GCN-NEXT: .set amdgpu.max_num_agpr, 32
-; GCN-NEXT: .set amdgpu.max_num_sgpr, 34
+; GCN: .set .Lamdgpu.max_num_vgpr, 32
+; GCN-NEXT: .set .Lamdgpu.max_num_agpr, 32
+; GCN-NEXT: .set .Lamdgpu.max_num_sgpr, 34
attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
index 8f4cb364751d88..15284ad45a9261 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
@@ -60,9 +60,9 @@ bb:
declare void @undef_func()
; CHECK: .type kernel_call_undef_func
-; CHECK: .set kernel_call_undef_func.num_agpr, max(0, amdgpu.max_num_agpr)
-; CHECK: NumAgprs: kernel_call_undef_func.num_agpr
-; CHECK: .set amdgpu.max_num_agpr, 32
+; CHECK: .set .Lkernel_call_undef_func.num_agpr, max(0, .Lamdgpu.max_num_agpr)
+; CHECK: NumAgprs: .Lkernel_call_undef_func.num_agpr
+; CHECK: .set .Lamdgpu.max_num_agpr, 32
define amdgpu_kernel void @kernel_call_undef_func() #0 {
bb:
call void @undef_func()
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll
index d45e116beb4e3e..374fd32ec5997f 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll
@@ -547,20 +547,20 @@ define amdgpu_kernel void @f256() #256 {
attributes #256 = { nounwind "amdgpu-flat-work-group-size"="256,256" }
; GCN-LABEL: {{^}}f512:
-; GFX9: .set f512.num_vgpr, max(128, amdgpu.max_num_vgpr)
-; GFX90A: .set f512.num_vgpr, max(128, amdgpu.max_num_vgpr)
-; GFX90A: .set f512.num_agpr, max(128, amdgpu.max_num_agpr)
-; GFX10WGP-WAVE32: .set f512.num_vgpr, max(256, amdgpu.max_num_vgpr)
-; GFX10WGP-WAVE64: .set f512.num_vgpr, max(256, amdgpu.max_num_vgpr)
-; GFX10CU-WAVE32: .set f512.num_vgpr, max(128, amdgpu.max_num_vgpr)
-; GFX10CU-WAVE64: .set f512.num_vgpr, max(128, amdgpu.max_num_vgpr)
-; GFX11WGP-WAVE32: .set f512.num_vgpr, max(256, amdgpu.max_num_vgpr)
-; GFX11WGP-WAVE64: .set f512.num_vgpr, max(256, amdgpu.max_num_vgpr)
-; GFX11CU-WAVE32: .set f512.num_vgpr, max(192, amdgpu.max_num_vgpr)
-; GFX11CU-WAVE64: .set f512.num_vgpr, max(192, amdgpu.max_num_vgpr)
-; GCN: NumVgprs: f512.num_vgpr
-; GFX90A: NumAgprs: f512.num_agpr
-; GFX90A: TotalNumVgprs: totalnumvgprs(f512.num_agpr, f512.num_vgpr)
+; GFX9: .set .Lf512.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
+; GFX90A: .set .Lf512.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
+; GFX90A: .set .Lf512.num_agpr, max(128, .Lamdgpu.max_num_agpr)
+; GFX10WGP-WAVE32: .set .Lf512.num_vgpr, max(256, .Lamdgpu.max_num_vgpr)
+; GFX10WGP-WAVE64: .set .Lf512.num_vgpr, max(256, .Lamdgpu.max_num_vgpr)
+; GFX10CU-WAVE32: .set .Lf512.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
+; GFX10CU-WAVE64: .set .Lf512.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
+; GFX11WGP-WAVE32: .set .Lf512.num_vgpr, max(256, .Lamdgpu.max_num_vgpr)
+; GFX11WGP-WAVE64: .set .Lf512.num_vgpr, max(256, .Lamdgpu.max_num_vgpr)
+; GFX11CU-WAVE32: .set .Lf512.num_vgpr, max(192, .Lamdgpu.max_num_vgpr)
+; GFX11CU-WAVE64: .set .Lf512.num_vgpr, max(192, .Lamdgpu.max_num_vgpr)
+; GCN: NumVgprs: .Lf512.num_vgpr
+; GFX90A: NumAgprs: .Lf512.num_agpr
+; GFX90A: TotalNumVgprs: totalnumvgprs(.Lf512.num_agpr, .Lf512.num_vgpr)
define amdgpu_kernel void @f512() #512 {
call void @foo()
call void @use256vgprs()
@@ -569,20 +569,20 @@ define amdgpu_kernel void @f512() #512 {
attributes #512 = { nounwind "amdgpu-flat-work-group-size"="512,512" }
; GCN-LABEL: {{^}}f1024:
-; GFX9: .set f1024.num_vgpr, max(64, amdgpu.max_num_vgpr)
-; GFX90A: .set f1024.num_vgpr, max(64, amdgpu.max_num_vgpr)
-; GFX90A: .set f1024.num_agpr, max(64, amdgpu.max_num_agpr)
-; GFX10WGP-WAVE32: .set f1024.num_vgpr, max(128, amdgpu.max_num_vgpr)
-; GFX10WGP-WAVE64: .set f1024.num_vgpr, max(128, amdgpu.max_num_vgpr)
-; GFX10CU-WAVE32: .set f1024.num_vgpr, max(64, amdgpu.max_num_vgpr)
-; GFX10CU-WAVE64: .set f1024.num_vgpr, max(64, amdgpu.max_num_vgpr)
-; GFX11WGP-WAVE32: .set f1024.num_vgpr, max(192, amdgpu.max_num_vgpr)
-; GFX11WGP-WAVE64: .set f1024.num_vgpr, max(192, amdgpu.max_num_vgpr)
-; GFX11CU-WAVE32: .set f1024.num_vgpr, max(96, amdgpu.max_num_vgpr)
-; GFX11CU-WAVE64: .set f1024.num_vgpr, max(96, amdgpu.max_num_vgpr)
-; GCN: NumVgprs: f1024.num_vgpr
-; GFX90A: NumAgprs: f1024.num_agpr
-; GFX90A: TotalNumVgprs: totalnumvgprs(f1024.num_agpr, f1024.num_vgpr)
+; GFX9: .set .Lf1024.num_vgpr, max(64, .Lamdgpu.max_num_vgpr)
+; GFX90A: .set .Lf1024.num_vgpr, max(64, .Lamdgpu.max_num_vgpr)
+; GFX90A: .set .Lf1024.num_agpr, max(64, .Lamdgpu.max_num_agpr)
+; GFX10WGP-WAVE32: .set .Lf1024.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
+; GFX10WGP-WAVE64: .set .Lf1024.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
+; GFX10CU-WAVE32: .set .Lf1024.num_vgpr, max(64, .Lamdgpu.max_num_vgpr)
+; GFX10CU-WAVE64: .set .Lf1024.num_vgpr, max(64, .Lamdgpu.max_num_vgpr)
+; GFX11WGP-WAVE32: .set .Lf1024.num_vgpr, max(192, .Lamdgpu.max_num_vgpr)
+; GFX11WGP-WAVE64: .set .Lf1024.num_vgpr, max(192, .Lamdgpu.max_num_vgpr)
+; GFX11CU-WAVE32: .set .Lf1024.num_vgpr, max(96, .Lamdgpu.max_num_vgpr)
+; GFX11CU-WAVE64: .set .Lf1024.num_vgpr, max(96, .Lamdgpu.max_num_vgpr)
+; GCN: NumVgprs: .Lf1024.num_vgpr
+; GFX90A: NumAgprs: .Lf1024.num_agpr
+; GFX90A: TotalNumVgprs: totalnumvgprs(.Lf1024.num_agpr, .Lf1024.num_vgpr)
define amdgpu_kernel void @f1024() #1024 {
call void @foo()
call void @use256vgprs()
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
index ff8a490950a11e..2b6cb53596c5db 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
@@ -8,9 +8,9 @@
@alias = hidden alias void (), ptr @aliasee_default
; ALL-LABEL: {{^}}kernel:
-; ALL: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel.num_agpr, kernel.num_vgpr), 1, 0)
-; ALL-NEXT: .amdhsa_next_free_sgpr (max(kernel.numbered_sgpr+(extrasgprs(kernel.uses_vcc, kernel.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel.uses_vcc, kernel.uses_flat_scratch, 1))
-; GFX90A-NEXT: .amdhsa_accum_offset ((((((alignto(max(1, kernel.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
+; ALL: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel.num_agpr, .Lkernel.num_vgpr), 1, 0)
+; ALL-NEXT: .amdhsa_next_free_sgpr (max(.Lkernel.numbered_sgpr+(extrasgprs(.Lkernel.uses_vcc, .Lkernel.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(.Lkernel.uses_vcc, .Lkernel.uses_flat_scratch, 1))
+; GFX90A-NEXT: .amdhsa_accum_offset ((((((alignto(max(1, .Lkernel.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
; ALL: .set kernel.num_vgpr, max(41, aliasee_default.num_vgpr)
; ALL-NEXT: .set kernel.num_agpr, max(0, aliasee_default.num_agpr)
@@ -26,9 +26,9 @@ bb:
call void asm sideeffect "; clobber a26 ", "~{a26}"()
ret void
}
-; ALL: .set aliasee_default.num_vgpr, 0
-; ALL-NEXT: .set aliasee_default.num_agpr, 27
-; ALL-NEXT: .set aliasee_default.numbered_sgpr, 32
+; ALL: .set .Laliasee_default.num_vgpr, 0
+; ALL-NEXT: .set .Laliasee_default.num_agpr, 27
+; ALL-NEXT: .set .Laliasee_default.numbered_sgpr, 32
attributes #0 = { noinline norecurse nounwind optnone }
attributes #1 = { noinline norecurse nounwind readnone willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
index fdd37bb299807d..cd14dc57191723 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
@@ -16,9 +16,9 @@ bb:
ret void
}
-; CHECK: .set aliasee_default_vgpr64_sgpr102.num_vgpr, 53
-; CHECK-NEXT: .set aliasee_default_vgpr64_sgpr102.num_agpr, 0
-; CHECK-NEXT: .set aliasee_default_vgpr64_sgpr102.numbered_sgpr, 32
+; CHECK: .set .Laliasee_default_vgpr64_sgpr102.num_vgpr, 53
+; CHECK-NEXT: .set .Laliasee_default_vgpr64_sgpr102.num_agpr, 0
+; CHECK-NEXT: .set .Laliasee_default_vgpr64_sgpr102.numbered_sgpr, 32
define internal void @aliasee_default_vgpr64_sgpr102() #1 {
bb:
call void asm sideeffect "; clobber v52 ", "~{v52}"()
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
index 3b08960d164a69..367bbafed55808 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
@@ -9,8 +9,8 @@
; The parent kernel has a higher VGPR usage than the possible callees.
; CHECK-LABEL: {{^}}kernel1:
-; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel1.num_agpr, kernel1.num_vgpr), 1, 0)
-; CHECK-NEXT: .amdhsa_next_free_sgpr (max(kernel1.numbered_sgpr+(extrasgprs(kernel1.uses_vcc, kernel1.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel1.uses_vcc, kernel1.uses_flat_scratch, 1))
+; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel1.num_agpr, .Lkernel1.num_vgpr), 1, 0)
+; CHECK-NEXT: .amdhsa_next_free_sgpr (max(.Lkernel1.numbered_sgpr+(extrasgprs(.Lkernel1.uses_vcc, .Lkernel1.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(.Lkernel1.uses_vcc, .Lkernel1.uses_flat_scratch, 1))
; CHECK: .set kernel1.num_vgpr, max(42, aliasee_vgpr32_sgpr76.num_vgpr)
; CHECK-NEXT: .set kernel1.num_agpr, max(0, aliasee_vgpr32_sgpr76.num_agpr)
@@ -22,9 +22,9 @@ bb:
ret void
}
-; CHECK: .set aliasee_vgpr32_sgpr76.num_vgpr, 27
-; CHECK-NEXT: .set aliasee_vgpr32_sgpr76.num_agpr, 0
-; CHECK-NEXT: .set aliasee_vgpr32_sgpr76.numbered_sgpr, 32
+; CHECK: .set .Laliasee_vgpr32_sgpr76.num_vgpr, 27
+; CHECK-NEXT: .set .Laliasee_vgpr32_sgpr76.num_agpr, 0
+; CHECK-NEXT: .set .Laliasee_vgpr32_sgpr76.numbered_sgpr, 32
define internal void @aliasee_vgpr32_sgpr76() #1 {
bb:
call void asm sideeffect "; clobber v26 ", "~{v26}"()
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
index b044e0a7167992..c0b28661f62067 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
@@ -7,8 +7,8 @@
@alias2 = hidden alias void (), ptr @aliasee_vgpr64_sgpr102
; CHECK-LABEL: {{^}}kernel2:
-; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel2.num_agpr, kernel2.num_vgpr), 1, 0)
-; CHECK-NEXT: .amdhsa_next_free_sgpr (max(kernel2.numbered_sgpr+(extrasgprs(kernel2.uses_vcc, kernel2.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel2.uses_vcc, kernel2.uses_flat_scratch, 1))
+; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel2.num_agpr, .Lkernel2.num_vgpr), 1, 0)
+; CHECK-NEXT: .amdhsa_next_free_sgpr (max(.Lkernel2.numbered_sgpr+(extrasgprs(.Lkernel2.uses_vcc, .Lkernel2.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(.Lkernel2.uses_vcc, .Lkernel2.uses_flat_scratch, 1))
; CHECK: .set kernel2.num_vgpr, max(41, aliasee_vgpr64_sgpr102.num_vgpr)
; CHECK-NEXT: .set kernel2.num_agpr, max(0, aliasee_vgpr64_sgpr102.num_agpr)
@@ -19,9 +19,9 @@ bb:
ret void
}
-; CHECK: .set aliasee_vgpr64_sgpr102.num_vgpr, 53
-; CHECK-NEXT: .set aliasee_vgpr64_sgpr102.num_agpr, 0
-; CHECK-NEXT: .set aliasee_vgpr64_sgpr102.numbered_sgpr, 32
+; CHECK: .set .Laliasee_vgpr64_sgpr102.num_vgpr, 53
+; CHECK-NEXT: .set .Laliasee_vgpr64_sgpr102.num_agpr, 0
+; CHECK-NEXT: .set .Laliasee_vgpr64_sgpr102.numbered_sgpr, 32
define internal void @aliasee_vgpr64_sgpr102() #1 {
bb:
call void asm sideeffect "; clobber v52 ", "~{v52}"()
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
index 264cc4bd190f97..dd17b099804b67 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
@@ -7,8 +7,8 @@
@alias3 = hidden alias void (), ptr @aliasee_vgpr256_sgpr102
; CHECK-LABEL: {{^}}kernel3:
-; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel3.num_agpr, kernel3.num_vgpr), 1, 0)
-; CHECK-NEXT: .amdhsa_next_free_sgpr (max(kernel3.numbered_sgpr+(extrasgprs(kernel3.uses_vcc, kernel3.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel3.uses_vcc, kernel3.uses_flat_scratch, 1))
+; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel3.num_agpr, .Lkernel3.num_vgpr), 1, 0)
+; CHECK-NEXT: .amdhsa_next_free_sgpr (max(.Lkernel3.numbered_sgpr+(extrasgprs(.Lkernel3.uses_vcc, .Lkernel3.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(.Lkernel3.uses_vcc, .Lkernel3.uses_flat_scratch, 1))
; CHECK: .set kernel3.num_vgpr, max(41, aliasee_vgpr256_sgpr102.num_vgpr)
; CHECK-NEXT: .set kernel3.num_agpr, max(0, aliasee_vgpr256_sgpr102.num_agpr)
@@ -19,9 +19,9 @@ bb:
ret void
}
-; CHECK: .set aliasee_vgpr256_sgpr102.num_vgpr, 253
-; CHECK-NEXT: .set aliasee_vgpr256_sgpr102.num_agpr, 0
-; CHECK-NEXT: .set aliasee_vgpr256_sgpr102.numbered_sgpr, 33
+; CHECK: .set .Laliasee_vgpr256_sgpr102.num_vgpr, 253
+; CHECK-NEXT: .set .Laliasee_vgpr256_sgpr102.num_agpr, 0
+; CHECK-NEXT: .set .Laliasee_vgpr256_sgpr102.numbered_sgpr, 33
define internal void @aliasee_vgpr256_sgpr102() #1 {
bb:
call void asm sideeffect "; clobber v252 ", "~{v252}"()
diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
index dbd00f09943c01..61450ab655b86a 100644
--- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
@@ -234,11 +234,11 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
; Make sure there's no assert when a sgpr96 is used.
; GCN-LABEL: {{^}}count_use_sgpr96_external_call
; GCN: ; sgpr96 s[{{[0-9]+}}:{{[0-9]+}}]
-; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(0, amdgpu.max_num_vgpr)
-; GCN: .set count_use_sgpr96_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
-; CI: TotalNumSgprs: count_use_sgpr96_external_call.numbered_sgpr+4
+; GCN: .set .Lcount_use_sgpr96_external_call.num_vgpr, max(0, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lcount_use_sgpr96_external_call.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
+; CI: TotalNumSgprs: .Lcount_use_sgpr96_external_call.numbered_sgpr+4
; VI-BUG: TotalNumSgprs: 96
-; GCN: NumVgprs: count_use_sgpr96_external_call.num_vgpr
+; GCN: NumVgprs: .Lcount_use_sgpr96_external_call.num_vgpr
define amdgpu_kernel void @count_use_sgpr96_external_call() {
entry:
tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1
@@ -249,11 +249,11 @@ entry:
; Make sure there's no assert when a sgpr160 is used.
; GCN-LABEL: {{^}}count_use_sgpr160_external_call
; GCN: ; sgpr160 s[{{[0-9]+}}:{{[0-9]+}}]
-; GCN: .set count_use_sgpr160_external_call.num_vgpr, max(0, amdgpu.max_num_vgpr)
-; GCN: .set count_use_sgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
-; CI: TotalNumSgprs: count_use_sgpr160_external_call.numbered_sgpr+4
+; GCN: .set .Lcount_use_sgpr160_external_call.num_vgpr, max(0, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lcount_use_sgpr160_external_call.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
+; CI: TotalNumSgprs: .Lcount_use_sgpr160_external_call.numbered_sgpr+4
; VI-BUG: TotalNumSgprs: 96
-; GCN: NumVgprs: count_use_sgpr160_external_call.num_vgpr
+; GCN: NumVgprs: .Lcount_use_sgpr160_external_call.num_vgpr
define amdgpu_kernel void @count_use_sgpr160_external_call() {
entry:
tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
@@ -264,11 +264,11 @@ entry:
; Make sure there's no assert when a vgpr160 is used.
; GCN-LABEL: {{^}}count_use_vgpr160_external_call
; GCN: ; vgpr160 v[{{[0-9]+}}:{{[0-9]+}}]
-; GCN: .set count_use_vgpr160_external_call.num_vgpr, max(5, amdgpu.max_num_vgpr)
-; GCN: .set count_use_vgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
-; CI: TotalNumSgprs: count_use_vgpr160_external_call.numbered_sgpr+4
+; GCN: .set .Lcount_use_vgpr160_external_call.num_vgpr, max(5, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lcount_use_vgpr160_external_call.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
+; CI: TotalNumSgprs: .Lcount_use_vgpr160_external_call.numbered_sgpr+4
; VI-BUG: TotalNumSgprs: 96
-; GCN: NumVgprs: count_use_vgpr160_external_call.num_vgpr
+; GCN: NumVgprs: .Lcount_use_vgpr160_external_call.num_vgpr
define amdgpu_kernel void @count_use_vgpr160_external_call() {
entry:
tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
@@ -276,9 +276,9 @@ entry:
ret void
}
-; GCN: .set amdgpu.max_num_vgpr, 50
-; GCN: .set amdgpu.max_num_agpr, 0
-; GCN: .set amdgpu.max_num_sgpr, 80
+; GCN: .set .Lamdgpu.max_num_vgpr, 50
+; GCN: .set .Lamdgpu.max_num_agpr, 0
+; GCN: .set .Lamdgpu.max_num_sgpr, 80
; GCN-LABEL: amdhsa.kernels:
; GCN: .name: count_use_sgpr96_external_call
diff --git a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
index 22257d3eba7d63..d0143ac2981ca3 100644
--- a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
@@ -1,17 +1,29 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -filetype=obj < %s > %t
+; RUN: llvm-objdump --syms %t | FileCheck -check-prefix=OBJ %s
+
+; OBJ-NOT: num_vgpr
+; OBJ-NOT: num_agpr
+; OBJ-NOT: numbered_sgpr
+; OBJ-NOT: private_seg_size
+; OBJ-NOT: uses_vcc
+; OBJ-NOT: uses_flat_scratch
+; OBJ-NOT: has_dyn_sized_stack
+; OBJ-NOT: has_recursion
+; OBJ-NOT: has_indirect_call
; Functions that don't make calls should have constants as its resource usage as no resource information has to be propagated.
; GCN-LABEL: {{^}}use_vcc:
-; GCN: .set use_vcc.num_vgpr, 0
-; GCN: .set use_vcc.num_agpr, 0
-; GCN: .set use_vcc.numbered_sgpr, 32
-; GCN: .set use_vcc.private_seg_size, 0
-; GCN: .set use_vcc.uses_vcc, 1
-; GCN: .set use_vcc.uses_flat_scratch, 0
-; GCN: .set use_vcc.has_dyn_sized_stack, 0
-; GCN: .set use_vcc.has_recursion, 0
-; GCN: .set use_vcc.has_indirect_call, 0
+; GCN: .set .Luse_vcc.num_vgpr, 0
+; GCN: .set .Luse_vcc.num_agpr, 0
+; GCN: .set .Luse_vcc.numbered_sgpr, 32
+; GCN: .set .Luse_vcc.private_seg_size, 0
+; GCN: .set .Luse_vcc.uses_vcc, 1
+; GCN: .set .Luse_vcc.uses_flat_scratch, 0
+; GCN: .set .Luse_vcc.has_dyn_sized_stack, 0
+; GCN: .set .Luse_vcc.has_recursion, 0
+; GCN: .set .Luse_vcc.has_indirect_call, 0
; GCN: TotalNumSgprs: 36
; GCN: NumVgprs: 0
; GCN: ScratchSize: 0
@@ -21,15 +33,15 @@ define void @use_vcc() #1 {
}
; GCN-LABEL: {{^}}indirect_use_vcc:
-; GCN: .set indirect_use_vcc.num_vgpr, max(41, use_vcc.num_vgpr)
-; GCN: .set indirect_use_vcc.num_agpr, max(0, use_vcc.num_agpr)
-; GCN: .set indirect_use_vcc.numbered_sgpr, max(34, use_vcc.numbered_sgpr)
-; GCN: .set indirect_use_vcc.private_seg_size, 16+(max(use_vcc.private_seg_size))
-; GCN: .set indirect_use_vcc.uses_vcc, or(1, use_vcc.uses_vcc)
-; GCN: .set indirect_use_vcc.uses_flat_scratch, or(0, use_vcc.uses_flat_scratch)
-; GCN: .set indirect_use_vcc.has_dyn_sized_stack, or(0, use_vcc.has_dyn_sized_stack)
-; GCN: .set indirect_use_vcc.has_recursion, or(0, use_vcc.has_recursion)
-; GCN: .set indirect_use_vcc.has_indirect_call, or(0, use_vcc.has_indirect_call)
+; GCN: .set .Lindirect_use_vcc.num_vgpr, max(41, .Luse_vcc.num_vgpr)
+; GCN: .set .Lindirect_use_vcc.num_agpr, max(0, .Luse_vcc.num_agpr)
+; GCN: .set .Lindirect_use_vcc.numbered_sgpr, max(34, .Luse_vcc.numbered_sgpr)
+; GCN: .set .Lindirect_use_vcc.private_seg_size, 16+(max(.Luse_vcc.private_seg_size))
+; GCN: .set .Lindirect_use_vcc.uses_vcc, or(1, .Luse_vcc.uses_vcc)
+; GCN: .set .Lindirect_use_vcc.uses_flat_scratch, or(0, .Luse_vcc.uses_flat_scratch)
+; GCN: .set .Lindirect_use_vcc.has_dyn_sized_stack, or(0, .Luse_vcc.has_dyn_sized_stack)
+; GCN: .set .Lindirect_use_vcc.has_recursion, or(0, .Luse_vcc.has_recursion)
+; GCN: .set .Lindirect_use_vcc.has_indirect_call, or(0, .Luse_vcc.has_indirect_call)
; GCN: TotalNumSgprs: 38
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
@@ -39,15 +51,15 @@ define void @indirect_use_vcc() #1 {
}
; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel:
-; GCN: .set indirect_2level_use_vcc_kernel.num_vgpr, max(32, indirect_use_vcc.num_vgpr)
-; GCN: .set indirect_2level_use_vcc_kernel.num_agpr, max(0, indirect_use_vcc.num_agpr)
-; GCN: .set indirect_2level_use_vcc_kernel.numbered_sgpr, max(33, indirect_use_vcc.numbered_sgpr)
-; GCN: .set indirect_2level_use_vcc_kernel.private_seg_size, 0+(max(indirect_use_vcc.private_seg_size))
-; GCN: .set indirect_2level_use_vcc_kernel.uses_vcc, or(1, indirect_use_vcc.uses_vcc)
-; GCN: .set indirect_2level_use_vcc_kernel.uses_flat_scratch, or(1, indirect_use_vcc.uses_flat_scratch)
-; GCN: .set indirect_2level_use_vcc_kernel.has_dyn_sized_stack, or(0, indirect_use_vcc.has_dyn_sized_stack)
-; GCN: .set indirect_2level_use_vcc_kernel.has_recursion, or(0, indirect_use_vcc.has_recursion)
-; GCN: .set indirect_2level_use_vcc_kernel.has_indirect_call, or(0, indirect_use_vcc.has_indirect_call)
+; GCN: .set .Lindirect_2level_use_vcc_kernel.num_vgpr, max(32, .Lindirect_use_vcc.num_vgpr)
+; GCN: .set .Lindirect_2level_use_vcc_kernel.num_agpr, max(0, .Lindirect_use_vcc.num_agpr)
+; GCN: .set .Lindirect_2level_use_vcc_kernel.numbered_sgpr, max(33, .Lindirect_use_vcc.numbered_sgpr)
+; GCN: .set .Lindirect_2level_use_vcc_kernel.private_seg_size, 0+(max(.Lindirect_use_vcc.private_seg_size))
+; GCN: .set .Lindirect_2level_use_vcc_kernel.uses_vcc, or(1, .Lindirect_use_vcc.uses_vcc)
+; GCN: .set .Lindirect_2level_use_vcc_kernel.uses_flat_scratch, or(1, .Lindirect_use_vcc.uses_flat_scratch)
+; GCN: .set .Lindirect_2level_use_vcc_kernel.has_dyn_sized_stack, or(0, .Lindirect_use_vcc.has_dyn_sized_stack)
+; GCN: .set .Lindirect_2level_use_vcc_kernel.has_recursion, or(0, .Lindirect_use_vcc.has_recursion)
+; GCN: .set .Lindirect_2level_use_vcc_kernel.has_indirect_call, or(0, .Lindirect_use_vcc.has_indirect_call)
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
@@ -57,15 +69,15 @@ define amdgpu_kernel void @indirect_2level_use_vcc_kernel(ptr addrspace(1) %out)
}
; GCN-LABEL: {{^}}use_flat_scratch:
-; GCN: .set use_flat_scratch.num_vgpr, 0
-; GCN: .set use_flat_scratch.num_agpr, 0
-; GCN: .set use_flat_scratch.numbered_sgpr, 32
-; GCN: .set use_flat_scratch.private_seg_size, 0
-; GCN: .set use_flat_scratch.uses_vcc, 0
-; GCN: .set use_flat_scratch.uses_flat_scratch, 1
-; GCN: .set use_flat_scratch.has_dyn_sized_stack, 0
-; GCN: .set use_flat_scratch.has_recursion, 0
-; GCN: .set use_flat_scratch.has_indirect_call, 0
+; GCN: .set .Luse_flat_scratch.num_vgpr, 0
+; GCN: .set .Luse_flat_scratch.num_agpr, 0
+; GCN: .set .Luse_flat_scratch.numbered_sgpr, 32
+; GCN: .set .Luse_flat_scratch.private_seg_size, 0
+; GCN: .set .Luse_flat_scratch.uses_vcc, 0
+; GCN: .set .Luse_flat_scratch.uses_flat_scratch, 1
+; GCN: .set .Luse_flat_scratch.has_dyn_sized_stack, 0
+; GCN: .set .Luse_flat_scratch.has_recursion, 0
+; GCN: .set .Luse_flat_scratch.has_indirect_call, 0
; GCN: TotalNumSgprs: 38
; GCN: NumVgprs: 0
; GCN: ScratchSize: 0
@@ -75,15 +87,15 @@ define void @use_flat_scratch() #1 {
}
; GCN-LABEL: {{^}}indirect_use_flat_scratch:
-; GCN: .set indirect_use_flat_scratch.num_vgpr, max(41, use_flat_scratch.num_vgpr)
-; GCN: .set indirect_use_flat_scratch.num_agpr, max(0, use_flat_scratch.num_agpr)
-; GCN: .set indirect_use_flat_scratch.numbered_sgpr, max(34, use_flat_scratch.numbered_sgpr)
-; GCN: .set indirect_use_flat_scratch.private_seg_size, 16+(max(use_flat_scratch.private_seg_size))
-; GCN: .set indirect_use_flat_scratch.uses_vcc, or(1, use_flat_scratch.uses_vcc)
-; GCN: .set indirect_use_flat_scratch.uses_flat_scratch, or(0, use_flat_scratch.uses_flat_scratch)
-; GCN: .set indirect_use_flat_scratch.has_dyn_sized_stack, or(0, use_flat_scratch.has_dyn_sized_stack)
-; GCN: .set indirect_use_flat_scratch.has_recursion, or(0, use_flat_scratch.has_recursion)
-; GCN: .set indirect_use_flat_scratch.has_indirect_call, or(0, use_flat_scratch.has_indirect_call)
+; GCN: .set .Lindirect_use_flat_scratch.num_vgpr, max(41, .Luse_flat_scratch.num_vgpr)
+; GCN: .set .Lindirect_use_flat_scratch.num_agpr, max(0, .Luse_flat_scratch.num_agpr)
+; GCN: .set .Lindirect_use_flat_scratch.numbered_sgpr, max(34, .Luse_flat_scratch.numbered_sgpr)
+; GCN: .set .Lindirect_use_flat_scratch.private_seg_size, 16+(max(.Luse_flat_scratch.private_seg_size))
+; GCN: .set .Lindirect_use_flat_scratch.uses_vcc, or(1, .Luse_flat_scratch.uses_vcc)
+; GCN: .set .Lindirect_use_flat_scratch.uses_flat_scratch, or(0, .Luse_flat_scratch.uses_flat_scratch)
+; GCN: .set .Lindirect_use_flat_scratch.has_dyn_sized_stack, or(0, .Luse_flat_scratch.has_dyn_sized_stack)
+; GCN: .set .Lindirect_use_flat_scratch.has_recursion, or(0, .Luse_flat_scratch.has_recursion)
+; GCN: .set .Lindirect_use_flat_scratch.has_indirect_call, or(0, .Luse_flat_scratch.has_indirect_call)
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
@@ -93,15 +105,15 @@ define void @indirect_use_flat_scratch() #1 {
}
; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel:
-; GCN: .set indirect_2level_use_flat_scratch_kernel.num_vgpr, max(32, indirect_use_flat_scratch.num_vgpr)
-; GCN: .set indirect_2level_use_flat_scratch_kernel.num_agpr, max(0, indirect_use_flat_scratch.num_agpr)
-; GCN: .set indirect_2level_use_flat_scratch_kernel.numbered_sgpr, max(33, indirect_use_flat_scratch.numbered_sgpr)
-; GCN: .set indirect_2level_use_flat_scratch_kernel.private_seg_size, 0+(max(indirect_use_flat_scratch.private_seg_size))
-; GCN: .set indirect_2level_use_flat_scratch_kernel.uses_vcc, or(1, indirect_use_flat_scratch.uses_vcc)
-; GCN: .set indirect_2level_use_flat_scratch_kernel.uses_flat_scratch, or(1, indirect_use_flat_scratch.uses_flat_scratch)
-; GCN: .set indirect_2level_use_flat_scratch_kernel.has_dyn_sized_stack, or(0, indirect_use_flat_scratch.has_dyn_sized_stack)
-; GCN: .set indirect_2level_use_flat_scratch_kernel.has_recursion, or(0, indirect_use_flat_scratch.has_recursion)
-; GCN: .set indirect_2level_use_flat_scratch_kernel.has_indirect_call, or(0, indirect_use_flat_scratch.has_indirect_call)
+; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.num_vgpr, max(32, .Lindirect_use_flat_scratch.num_vgpr)
+; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.num_agpr, max(0, .Lindirect_use_flat_scratch.num_agpr)
+; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.numbered_sgpr, max(33, .Lindirect_use_flat_scratch.numbered_sgpr)
+; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.private_seg_size, 0+(max(.Lindirect_use_flat_scratch.private_seg_size))
+; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.uses_vcc, or(1, .Lindirect_use_flat_scratch.uses_vcc)
+; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.uses_flat_scratch, or(1, .Lindirect_use_flat_scratch.uses_flat_scratch)
+; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.has_dyn_sized_stack, or(0, .Lindirect_use_flat_scratch.has_dyn_sized_stack)
+; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.has_recursion, or(0, .Lindirect_use_flat_scratch.has_recursion)
+; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.has_indirect_call, or(0, .Lindirect_use_flat_scratch.has_indirect_call)
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
@@ -111,15 +123,15 @@ define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(ptr addrspace
}
; GCN-LABEL: {{^}}use_10_vgpr:
-; GCN: .set use_10_vgpr.num_vgpr, 10
-; GCN: .set use_10_vgpr.num_agpr, 0
-; GCN: .set use_10_vgpr.numbered_sgpr, 32
-; GCN: .set use_10_vgpr.private_seg_size, 0
-; GCN: .set use_10_vgpr.uses_vcc, 0
-; GCN: .set use_10_vgpr.uses_flat_scratch, 0
-; GCN: .set use_10_vgpr.has_dyn_sized_stack, 0
-; GCN: .set use_10_vgpr.has_recursion, 0
-; GCN: .set use_10_vgpr.has_indirect_call, 0
+; GCN: .set .Luse_10_vgpr.num_vgpr, 10
+; GCN: .set .Luse_10_vgpr.num_agpr, 0
+; GCN: .set .Luse_10_vgpr.numbered_sgpr, 32
+; GCN: .set .Luse_10_vgpr.private_seg_size, 0
+; GCN: .set .Luse_10_vgpr.uses_vcc, 0
+; GCN: .set .Luse_10_vgpr.uses_flat_scratch, 0
+; GCN: .set .Luse_10_vgpr.has_dyn_sized_stack, 0
+; GCN: .set .Luse_10_vgpr.has_recursion, 0
+; GCN: .set .Luse_10_vgpr.has_indirect_call, 0
; GCN: TotalNumSgprs: 36
; GCN: NumVgprs: 10
; GCN: ScratchSize: 0
@@ -130,15 +142,15 @@ define void @use_10_vgpr() #1 {
}
; GCN-LABEL: {{^}}indirect_use_10_vgpr:
-; GCN: .set indirect_use_10_vgpr.num_vgpr, max(41, use_10_vgpr.num_vgpr)
-; GCN: .set indirect_use_10_vgpr.num_agpr, max(0, use_10_vgpr.num_agpr)
-; GCN: .set indirect_use_10_vgpr.numbered_sgpr, max(34, use_10_vgpr.numbered_sgpr)
-; GCN: .set indirect_use_10_vgpr.private_seg_size, 16+(max(use_10_vgpr.private_seg_size))
-; GCN: .set indirect_use_10_vgpr.uses_vcc, or(1, use_10_vgpr.uses_vcc)
-; GCN: .set indirect_use_10_vgpr.uses_flat_scratch, or(0, use_10_vgpr.uses_flat_scratch)
-; GCN: .set indirect_use_10_vgpr.has_dyn_sized_stack, or(0, use_10_vgpr.has_dyn_sized_stack)
-; GCN: .set indirect_use_10_vgpr.has_recursion, or(0, use_10_vgpr.has_recursion)
-; GCN: .set indirect_use_10_vgpr.has_indirect_call, or(0, use_10_vgpr.has_indirect_call)
+; GCN: .set .Lindirect_use_10_vgpr.num_vgpr, max(41, .Luse_10_vgpr.num_vgpr)
+; GCN: .set .Lindirect_use_10_vgpr.num_agpr, max(0, .Luse_10_vgpr.num_agpr)
+; GCN: .set .Lindirect_use_10_vgpr.numbered_sgpr, max(34, .Luse_10_vgpr.numbered_sgpr)
+; GCN: .set .Lindirect_use_10_vgpr.private_seg_size, 16+(max(.Luse_10_vgpr.private_seg_size))
+; GCN: .set .Lindirect_use_10_vgpr.uses_vcc, or(1, .Luse_10_vgpr.uses_vcc)
+; GCN: .set .Lindirect_use_10_vgpr.uses_flat_scratch, or(0, .Luse_10_vgpr.uses_flat_scratch)
+; GCN: .set .Lindirect_use_10_vgpr.has_dyn_sized_stack, or(0, .Luse_10_vgpr.has_dyn_sized_stack)
+; GCN: .set .Lindirect_use_10_vgpr.has_recursion, or(0, .Luse_10_vgpr.has_recursion)
+; GCN: .set .Lindirect_use_10_vgpr.has_indirect_call, or(0, .Luse_10_vgpr.has_indirect_call)
; GCN: TotalNumSgprs: 38
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
@@ -148,15 +160,15 @@ define void @indirect_use_10_vgpr() #0 {
}
; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr:
-; GCN: .set indirect_2_level_use_10_vgpr.num_vgpr, max(32, indirect_use_10_vgpr.num_vgpr)
-; GCN: .set indirect_2_level_use_10_vgpr.num_agpr, max(0, indirect_use_10_vgpr.num_agpr)
-; GCN: .set indirect_2_level_use_10_vgpr.numbered_sgpr, max(33, indirect_use_10_vgpr.numbered_sgpr)
-; GCN: .set indirect_2_level_use_10_vgpr.private_seg_size, 0+(max(indirect_use_10_vgpr.private_seg_size))
-; GCN: .set indirect_2_level_use_10_vgpr.uses_vcc, or(1, indirect_use_10_vgpr.uses_vcc)
-; GCN: .set indirect_2_level_use_10_vgpr.uses_flat_scratch, or(1, indirect_use_10_vgpr.uses_flat_scratch)
-; GCN: .set indirect_2_level_use_10_vgpr.has_dyn_sized_stack, or(0, indirect_use_10_vgpr.has_dyn_sized_stack)
-; GCN: .set indirect_2_level_use_10_vgpr.has_recursion, or(0, indirect_use_10_vgpr.has_recursion)
-; GCN: .set indirect_2_level_use_10_vgpr.has_indirect_call, or(0, indirect_use_10_vgpr.has_indirect_call)
+; GCN: .set .Lindirect_2_level_use_10_vgpr.num_vgpr, max(32, .Lindirect_use_10_vgpr.num_vgpr)
+; GCN: .set .Lindirect_2_level_use_10_vgpr.num_agpr, max(0, .Lindirect_use_10_vgpr.num_agpr)
+; GCN: .set .Lindirect_2_level_use_10_vgpr.numbered_sgpr, max(33, .Lindirect_use_10_vgpr.numbered_sgpr)
+; GCN: .set .Lindirect_2_level_use_10_vgpr.private_seg_size, 0+(max(.Lindirect_use_10_vgpr.private_seg_size))
+; GCN: .set .Lindirect_2_level_use_10_vgpr.uses_vcc, or(1, .Lindirect_use_10_vgpr.uses_vcc)
+; GCN: .set .Lindirect_2_level_use_10_vgpr.uses_flat_scratch, or(1, .Lindirect_use_10_vgpr.uses_flat_scratch)
+; GCN: .set .Lindirect_2_level_use_10_vgpr.has_dyn_sized_stack, or(0, .Lindirect_use_10_vgpr.has_dyn_sized_stack)
+; GCN: .set .Lindirect_2_level_use_10_vgpr.has_recursion, or(0, .Lindirect_use_10_vgpr.has_recursion)
+; GCN: .set .Lindirect_2_level_use_10_vgpr.has_indirect_call, or(0, .Lindirect_use_10_vgpr.has_indirect_call)
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
@@ -166,15 +178,15 @@ define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 {
}
; GCN-LABEL: {{^}}use_50_vgpr:
-; GCN: .set use_50_vgpr.num_vgpr, 50
-; GCN: .set use_50_vgpr.num_agpr, 0
-; GCN: .set use_50_vgpr.numbered_sgpr, 32
-; GCN: .set use_50_vgpr.private_seg_size, 0
-; GCN: .set use_50_vgpr.uses_vcc, 0
-; GCN: .set use_50_vgpr.uses_flat_scratch, 0
-; GCN: .set use_50_vgpr.has_dyn_sized_stack, 0
-; GCN: .set use_50_vgpr.has_recursion, 0
-; GCN: .set use_50_vgpr.has_indirect_call, 0
+; GCN: .set .Luse_50_vgpr.num_vgpr, 50
+; GCN: .set .Luse_50_vgpr.num_agpr, 0
+; GCN: .set .Luse_50_vgpr.numbered_sgpr, 32
+; GCN: .set .Luse_50_vgpr.private_seg_size, 0
+; GCN: .set .Luse_50_vgpr.uses_vcc, 0
+; GCN: .set .Luse_50_vgpr.uses_flat_scratch, 0
+; GCN: .set .Luse_50_vgpr.has_dyn_sized_stack, 0
+; GCN: .set .Luse_50_vgpr.has_recursion, 0
+; GCN: .set .Luse_50_vgpr.has_indirect_call, 0
; GCN: TotalNumSgprs: 36
; GCN: NumVgprs: 50
; GCN: ScratchSize: 0
@@ -184,15 +196,15 @@ define void @use_50_vgpr() #1 {
}
; GCN-LABEL: {{^}}indirect_use_50_vgpr:
-; GCN: .set indirect_use_50_vgpr.num_vgpr, max(41, use_50_vgpr.num_vgpr)
-; GCN: .set indirect_use_50_vgpr.num_agpr, max(0, use_50_vgpr.num_agpr)
-; GCN: .set indirect_use_50_vgpr.numbered_sgpr, max(34, use_50_vgpr.numbered_sgpr)
-; GCN: .set indirect_use_50_vgpr.private_seg_size, 16+(max(use_50_vgpr.private_seg_size))
-; GCN: .set indirect_use_50_vgpr.uses_vcc, or(1, use_50_vgpr.uses_vcc)
-; GCN: .set indirect_use_50_vgpr.uses_flat_scratch, or(0, use_50_vgpr.uses_flat_scratch)
-; GCN: .set indirect_use_50_vgpr.has_dyn_sized_stack, or(0, use_50_vgpr.has_dyn_sized_stack)
-; GCN: .set indirect_use_50_vgpr.has_recursion, or(0, use_50_vgpr.has_recursion)
-; GCN: .set indirect_use_50_vgpr.has_indirect_call, or(0, use_50_vgpr.has_indirect_call)
+; GCN: .set .Lindirect_use_50_vgpr.num_vgpr, max(41, .Luse_50_vgpr.num_vgpr)
+; GCN: .set .Lindirect_use_50_vgpr.num_agpr, max(0, .Luse_50_vgpr.num_agpr)
+; GCN: .set .Lindirect_use_50_vgpr.numbered_sgpr, max(34, .Luse_50_vgpr.numbered_sgpr)
+; GCN: .set .Lindirect_use_50_vgpr.private_seg_size, 16+(max(.Luse_50_vgpr.private_seg_size))
+; GCN: .set .Lindirect_use_50_vgpr.uses_vcc, or(1, .Luse_50_vgpr.uses_vcc)
+; GCN: .set .Lindirect_use_50_vgpr.uses_flat_scratch, or(0, .Luse_50_vgpr.uses_flat_scratch)
+; GCN: .set .Lindirect_use_50_vgpr.has_dyn_sized_stack, or(0, .Luse_50_vgpr.has_dyn_sized_stack)
+; GCN: .set .Lindirect_use_50_vgpr.has_recursion, or(0, .Luse_50_vgpr.has_recursion)
+; GCN: .set .Lindirect_use_50_vgpr.has_indirect_call, or(0, .Luse_50_vgpr.has_indirect_call)
; GCN: TotalNumSgprs: 38
; GCN: NumVgprs: 50
; GCN: ScratchSize: 16
@@ -202,15 +214,15 @@ define void @indirect_use_50_vgpr() #0 {
}
; GCN-LABEL: {{^}}use_80_sgpr:
-; GCN: .set use_80_sgpr.num_vgpr, 1
-; GCN: .set use_80_sgpr.num_agpr, 0
-; GCN: .set use_80_sgpr.numbered_sgpr, 80
-; GCN: .set use_80_sgpr.private_seg_size, 8
-; GCN: .set use_80_sgpr.uses_vcc, 0
-; GCN: .set use_80_sgpr.uses_flat_scratch, 0
-; GCN: .set use_80_sgpr.has_dyn_sized_stack, 0
-; GCN: .set use_80_sgpr.has_recursion, 0
-; GCN: .set use_80_sgpr.has_indirect_call, 0
+; GCN: .set .Luse_80_sgpr.num_vgpr, 1
+; GCN: .set .Luse_80_sgpr.num_agpr, 0
+; GCN: .set .Luse_80_sgpr.numbered_sgpr, 80
+; GCN: .set .Luse_80_sgpr.private_seg_size, 8
+; GCN: .set .Luse_80_sgpr.uses_vcc, 0
+; GCN: .set .Luse_80_sgpr.uses_flat_scratch, 0
+; GCN: .set .Luse_80_sgpr.has_dyn_sized_stack, 0
+; GCN: .set .Luse_80_sgpr.has_recursion, 0
+; GCN: .set .Luse_80_sgpr.has_indirect_call, 0
; GCN: TotalNumSgprs: 84
; GCN: NumVgprs: 1
; GCN: ScratchSize: 8
@@ -220,15 +232,15 @@ define void @use_80_sgpr() #1 {
}
; GCN-LABEL: {{^}}indirect_use_80_sgpr:
-; GCN: .set indirect_use_80_sgpr.num_vgpr, max(41, use_80_sgpr.num_vgpr)
-; GCN: .set indirect_use_80_sgpr.num_agpr, max(0, use_80_sgpr.num_agpr)
-; GCN: .set indirect_use_80_sgpr.numbered_sgpr, max(34, use_80_sgpr.numbered_sgpr)
-; GCN: .set indirect_use_80_sgpr.private_seg_size, 16+(max(use_80_sgpr.private_seg_size))
-; GCN: .set indirect_use_80_sgpr.uses_vcc, or(1, use_80_sgpr.uses_vcc)
-; GCN: .set indirect_use_80_sgpr.uses_flat_scratch, or(0, use_80_sgpr.uses_flat_scratch)
-; GCN: .set indirect_use_80_sgpr.has_dyn_sized_stack, or(0, use_80_sgpr.has_dyn_sized_stack)
-; GCN: .set indirect_use_80_sgpr.has_recursion, or(0, use_80_sgpr.has_recursion)
-; GCN: .set indirect_use_80_sgpr.has_indirect_call, or(0, use_80_sgpr.has_indirect_call)
+; GCN: .set .Lindirect_use_80_sgpr.num_vgpr, max(41, .Luse_80_sgpr.num_vgpr)
+; GCN: .set .Lindirect_use_80_sgpr.num_agpr, max(0, .Luse_80_sgpr.num_agpr)
+; GCN: .set .Lindirect_use_80_sgpr.numbered_sgpr, max(34, .Luse_80_sgpr.numbered_sgpr)
+; GCN: .set .Lindirect_use_80_sgpr.private_seg_size, 16+(max(.Luse_80_sgpr.private_seg_size))
+; GCN: .set .Lindirect_use_80_sgpr.uses_vcc, or(1, .Luse_80_sgpr.uses_vcc)
+; GCN: .set .Lindirect_use_80_sgpr.uses_flat_scratch, or(0, .Luse_80_sgpr.uses_flat_scratch)
+; GCN: .set .Lindirect_use_80_sgpr.has_dyn_sized_stack, or(0, .Luse_80_sgpr.has_dyn_sized_stack)
+; GCN: .set .Lindirect_use_80_sgpr.has_recursion, or(0, .Luse_80_sgpr.has_recursion)
+; GCN: .set .Lindirect_use_80_sgpr.has_indirect_call, or(0, .Luse_80_sgpr.has_indirect_call)
; GCN: TotalNumSgprs: 84
; GCN: NumVgprs: 41
; GCN: ScratchSize: 24
@@ -238,15 +250,15 @@ define void @indirect_use_80_sgpr() #1 {
}
; GCN-LABEL: {{^}}indirect_2_level_use_80_sgpr:
-; GCN: .set indirect_2_level_use_80_sgpr.num_vgpr, max(32, indirect_use_80_sgpr.num_vgpr)
-; GCN: .set indirect_2_level_use_80_sgpr.num_agpr, max(0, indirect_use_80_sgpr.num_agpr)
-; GCN: .set indirect_2_level_use_80_sgpr.numbered_sgpr, max(33, indirect_use_80_sgpr.numbered_sgpr)
-; GCN: .set indirect_2_level_use_80_sgpr.private_seg_size, 0+(max(indirect_use_80_sgpr.private_seg_size))
-; GCN: .set indirect_2_level_use_80_sgpr.uses_vcc, or(1, indirect_use_80_sgpr.uses_vcc)
-; GCN: .set indirect_2_level_use_80_sgpr.uses_flat_scratch, or(1, indirect_use_80_sgpr.uses_flat_scratch)
-; GCN: .set indirect_2_level_use_80_sgpr.has_dyn_sized_stack, or(0, indirect_use_80_sgpr.has_dyn_sized_stack)
-; GCN: .set indirect_2_level_use_80_sgpr.has_recursion, or(0, indirect_use_80_sgpr.has_recursion)
-; GCN: .set indirect_2_level_use_80_sgpr.has_indirect_call, or(0, indirect_use_80_sgpr.has_indirect_call)
+; GCN: .set .Lindirect_2_level_use_80_sgpr.num_vgpr, max(32, .Lindirect_use_80_sgpr.num_vgpr)
+; GCN: .set .Lindirect_2_level_use_80_sgpr.num_agpr, max(0, .Lindirect_use_80_sgpr.num_agpr)
+; GCN: .set .Lindirect_2_level_use_80_sgpr.numbered_sgpr, max(33, .Lindirect_use_80_sgpr.numbered_sgpr)
+; GCN: .set .Lindirect_2_level_use_80_sgpr.private_seg_size, 0+(max(.Lindirect_use_80_sgpr.private_seg_size))
+; GCN: .set .Lindirect_2_level_use_80_sgpr.uses_vcc, or(1, .Lindirect_use_80_sgpr.uses_vcc)
+; GCN: .set .Lindirect_2_level_use_80_sgpr.uses_flat_scratch, or(1, .Lindirect_use_80_sgpr.uses_flat_scratch)
+; GCN: .set .Lindirect_2_level_use_80_sgpr.has_dyn_sized_stack, or(0, .Lindirect_use_80_sgpr.has_dyn_sized_stack)
+; GCN: .set .Lindirect_2_level_use_80_sgpr.has_recursion, or(0, .Lindirect_use_80_sgpr.has_recursion)
+; GCN: .set .Lindirect_2_level_use_80_sgpr.has_indirect_call, or(0, .Lindirect_use_80_sgpr.has_indirect_call)
; GCN: TotalNumSgprs: 86
; GCN: NumVgprs: 41
; GCN: ScratchSize: 24
@@ -256,15 +268,15 @@ define amdgpu_kernel void @indirect_2_level_use_80_sgpr() #0 {
}
; GCN-LABEL: {{^}}use_stack0:
-; GCN: .set use_stack0.num_vgpr, 1
-; GCN: .set use_stack0.num_agpr, 0
-; GCN: .set use_stack0.numbered_sgpr, 33
-; GCN: .set use_stack0.private_seg_size, 2052
-; GCN: .set use_stack0.uses_vcc, 0
-; GCN: .set use_stack0.uses_flat_scratch, 0
-; GCN: .set use_stack0.has_dyn_sized_stack, 0
-; GCN: .set use_stack0.has_recursion, 0
-; GCN: .set use_stack0.has_indirect_call, 0
+; GCN: .set .Luse_stack0.num_vgpr, 1
+; GCN: .set .Luse_stack0.num_agpr, 0
+; GCN: .set .Luse_stack0.numbered_sgpr, 33
+; GCN: .set .Luse_stack0.private_seg_size, 2052
+; GCN: .set .Luse_stack0.uses_vcc, 0
+; GCN: .set .Luse_stack0.uses_flat_scratch, 0
+; GCN: .set .Luse_stack0.has_dyn_sized_stack, 0
+; GCN: .set .Luse_stack0.has_recursion, 0
+; GCN: .set .Luse_stack0.has_indirect_call, 0
; GCN: TotalNumSgprs: 37
; GCN: NumVgprs: 1
; GCN: ScratchSize: 2052
@@ -275,15 +287,15 @@ define void @use_stack0() #1 {
}
; GCN-LABEL: {{^}}use_stack1:
-; GCN: .set use_stack1.num_vgpr, 1
-; GCN: .set use_stack1.num_agpr, 0
-; GCN: .set use_stack1.numbered_sgpr, 33
-; GCN: .set use_stack1.private_seg_size, 404
-; GCN: .set use_stack1.uses_vcc, 0
-; GCN: .set use_stack1.uses_flat_scratch, 0
-; GCN: .set use_stack1.has_dyn_sized_stack, 0
-; GCN: .set use_stack1.has_recursion, 0
-; GCN: .set use_stack1.has_indirect_call, 0
+; GCN: .set .Luse_stack1.num_vgpr, 1
+; GCN: .set .Luse_stack1.num_agpr, 0
+; GCN: .set .Luse_stack1.numbered_sgpr, 33
+; GCN: .set .Luse_stack1.private_seg_size, 404
+; GCN: .set .Luse_stack1.uses_vcc, 0
+; GCN: .set .Luse_stack1.uses_flat_scratch, 0
+; GCN: .set .Luse_stack1.has_dyn_sized_stack, 0
+; GCN: .set .Luse_stack1.has_recursion, 0
+; GCN: .set .Luse_stack1.has_indirect_call, 0
; GCN: TotalNumSgprs: 37
; GCN: NumVgprs: 1
; GCN: ScratchSize: 404
@@ -294,15 +306,15 @@ define void @use_stack1() #1 {
}
; GCN-LABEL: {{^}}indirect_use_stack:
-; GCN: .set indirect_use_stack.num_vgpr, max(41, use_stack0.num_vgpr)
-; GCN: .set indirect_use_stack.num_agpr, max(0, use_stack0.num_agpr)
-; GCN: .set indirect_use_stack.numbered_sgpr, max(34, use_stack0.numbered_sgpr)
-; GCN: .set indirect_use_stack.private_seg_size, 80+(max(use_stack0.private_seg_size))
-; GCN: .set indirect_use_stack.uses_vcc, or(1, use_stack0.uses_vcc)
-; GCN: .set indirect_use_stack.uses_flat_scratch, or(0, use_stack0.uses_flat_scratch)
-; GCN: .set indirect_use_stack.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack)
-; GCN: .set indirect_use_stack.has_recursion, or(0, use_stack0.has_recursion)
-; GCN: .set indirect_use_stack.has_indirect_call, or(0, use_stack0.has_indirect_call)
+; GCN: .set .Lindirect_use_stack.num_vgpr, max(41, .Luse_stack0.num_vgpr)
+; GCN: .set .Lindirect_use_stack.num_agpr, max(0, .Luse_stack0.num_agpr)
+; GCN: .set .Lindirect_use_stack.numbered_sgpr, max(34, .Luse_stack0.numbered_sgpr)
+; GCN: .set .Lindirect_use_stack.private_seg_size, 80+(max(.Luse_stack0.private_seg_size))
+; GCN: .set .Lindirect_use_stack.uses_vcc, or(1, .Luse_stack0.uses_vcc)
+; GCN: .set .Lindirect_use_stack.uses_flat_scratch, or(0, .Luse_stack0.uses_flat_scratch)
+; GCN: .set .Lindirect_use_stack.has_dyn_sized_stack, or(0, .Luse_stack0.has_dyn_sized_stack)
+; GCN: .set .Lindirect_use_stack.has_recursion, or(0, .Luse_stack0.has_recursion)
+; GCN: .set .Lindirect_use_stack.has_indirect_call, or(0, .Luse_stack0.has_indirect_call)
; GCN: TotalNumSgprs: 38
; GCN: NumVgprs: 41
; GCN: ScratchSize: 2132
@@ -314,15 +326,15 @@ define void @indirect_use_stack() #1 {
}
; GCN-LABEL: {{^}}indirect_2_level_use_stack:
-; GCN: .set indirect_2_level_use_stack.num_vgpr, max(32, indirect_use_stack.num_vgpr)
-; GCN: .set indirect_2_level_use_stack.num_agpr, max(0, indirect_use_stack.num_agpr)
-; GCN: .set indirect_2_level_use_stack.numbered_sgpr, max(33, indirect_use_stack.numbered_sgpr)
-; GCN: .set indirect_2_level_use_stack.private_seg_size, 0+(max(indirect_use_stack.private_seg_size))
-; GCN: .set indirect_2_level_use_stack.uses_vcc, or(1, indirect_use_stack.uses_vcc)
-; GCN: .set indirect_2_level_use_stack.uses_flat_scratch, or(1, indirect_use_stack.uses_flat_scratch)
-; GCN: .set indirect_2_level_use_stack.has_dyn_sized_stack, or(0, indirect_use_stack.has_dyn_sized_stack)
-; GCN: .set indirect_2_level_use_stack.has_recursion, or(0, indirect_use_stack.has_recursion)
-; GCN: .set indirect_2_level_use_stack.has_indirect_call, or(0, indirect_use_stack.has_indirect_call)
+; GCN: .set .Lindirect_2_level_use_stack.num_vgpr, max(32, .Lindirect_use_stack.num_vgpr)
+; GCN: .set .Lindirect_2_level_use_stack.num_agpr, max(0, .Lindirect_use_stack.num_agpr)
+; GCN: .set .Lindirect_2_level_use_stack.numbered_sgpr, max(33, .Lindirect_use_stack.numbered_sgpr)
+; GCN: .set .Lindirect_2_level_use_stack.private_seg_size, 0+(max(.Lindirect_use_stack.private_seg_size))
+; GCN: .set .Lindirect_2_level_use_stack.uses_vcc, or(1, .Lindirect_use_stack.uses_vcc)
+; GCN: .set .Lindirect_2_level_use_stack.uses_flat_scratch, or(1, .Lindirect_use_stack.uses_flat_scratch)
+; GCN: .set .Lindirect_2_level_use_stack.has_dyn_sized_stack, or(0, .Lindirect_use_stack.has_dyn_sized_stack)
+; GCN: .set .Lindirect_2_level_use_stack.has_recursion, or(0, .Lindirect_use_stack.has_recursion)
+; GCN: .set .Lindirect_2_level_use_stack.has_indirect_call, or(0, .Lindirect_use_stack.has_indirect_call)
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 2132
@@ -398,17 +410,17 @@ define amdgpu_kernel void @multi_call_with_external_and_duplicates() #0 {
}
; GCN-LABEL: {{^}}usage_external:
-; GCN: .set usage_external.num_vgpr, max(32, amdgpu.max_num_vgpr)
-; GCN: .set usage_external.num_agpr, max(0, amdgpu.max_num_agpr)
-; GCN: .set usage_external.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
-; GCN: .set usage_external.private_seg_size, 0
-; GCN: .set usage_external.uses_vcc, 1
-; GCN: .set usage_external.uses_flat_scratch, 1
-; GCN: .set usage_external.has_dyn_sized_stack, 1
-; GCN: .set usage_external.has_recursion, 0
-; GCN: .set usage_external.has_indirect_call, 1
-; GCN: TotalNumSgprs: usage_external.numbered_sgpr+6
-; GCN: NumVgprs: usage_external.num_vgpr
+; GCN: .set .Lusage_external.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lusage_external.num_agpr, max(0, .Lamdgpu.max_num_agpr)
+; GCN: .set .Lusage_external.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
+; GCN: .set .Lusage_external.private_seg_size, 0
+; GCN: .set .Lusage_external.uses_vcc, 1
+; GCN: .set .Lusage_external.uses_flat_scratch, 1
+; GCN: .set .Lusage_external.has_dyn_sized_stack, 1
+; GCN: .set .Lusage_external.has_recursion, 0
+; GCN: .set .Lusage_external.has_indirect_call, 1
+; GCN: TotalNumSgprs: .Lusage_external.numbered_sgpr+6
+; GCN: NumVgprs: .Lusage_external.num_vgpr
; GCN: ScratchSize: 0
define amdgpu_kernel void @usage_external() #0 {
call void @external()
@@ -418,17 +430,17 @@ define amdgpu_kernel void @usage_external() #0 {
declare void @external_recurse() #2
; GCN-LABEL: {{^}}usage_external_recurse:
-; GCN: .set usage_external_recurse.num_vgpr, max(32, amdgpu.max_num_vgpr)
-; GCN: .set usage_external_recurse.num_agpr, max(0, amdgpu.max_num_agpr)
-; GCN: .set usage_external_recurse.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
-; GCN: .set usage_external_recurse.private_seg_size, 0
-; GCN: .set usage_external_recurse.uses_vcc, 1
-; GCN: .set usage_external_recurse.uses_flat_scratch, 1
-; GCN: .set usage_external_recurse.has_dyn_sized_stack, 1
-; GCN: .set usage_external_recurse.has_recursion, 1
-; GCN: .set usage_external_recurse.has_indirect_call, 1
-; GCN: TotalNumSgprs: usage_external_recurse.numbered_sgpr+6
-; GCN: NumVgprs: usage_external_recurse.num_vgpr
+; GCN: .set .Lusage_external_recurse.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lusage_external_recurse.num_agpr, max(0, .Lamdgpu.max_num_agpr)
+; GCN: .set .Lusage_external_recurse.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
+; GCN: .set .Lusage_external_recurse.private_seg_size, 0
+; GCN: .set .Lusage_external_recurse.uses_vcc, 1
+; GCN: .set .Lusage_external_recurse.uses_flat_scratch, 1
+; GCN: .set .Lusage_external_recurse.has_dyn_sized_stack, 1
+; GCN: .set .Lusage_external_recurse.has_recursion, 1
+; GCN: .set .Lusage_external_recurse.has_indirect_call, 1
+; GCN: TotalNumSgprs: .Lusage_external_recurse.numbered_sgpr+6
+; GCN: NumVgprs: .Lusage_external_recurse.num_vgpr
; GCN: ScratchSize: 0
define amdgpu_kernel void @usage_external_recurse() #0 {
call void @external_recurse()
@@ -436,15 +448,15 @@ define amdgpu_kernel void @usage_external_recurse() #0 {
}
; GCN-LABEL: {{^}}direct_recursion_use_stack:
-; GCN: .set direct_recursion_use_stack.num_vgpr, 41
-; GCN: .set direct_recursion_use_stack.num_agpr, 0
-; GCN: .set direct_recursion_use_stack.numbered_sgpr, 36
-; GCN: .set direct_recursion_use_stack.private_seg_size, 2064
-; GCN: .set direct_recursion_use_stack.uses_vcc, 1
-; GCN: .set direct_recursion_use_stack.uses_flat_scratch, 0
-; GCN: .set direct_recursion_use_stack.has_dyn_sized_stack, 0
-; GCN: .set direct_recursion_use_stack.has_recursion, 1
-; GCN: .set direct_recursion_use_stack.has_indirect_call, 0
+; GCN: .set .Ldirect_recursion_use_stack.num_vgpr, 41
+; GCN: .set .Ldirect_recursion_use_stack.num_agpr, 0
+; GCN: .set .Ldirect_recursion_use_stack.numbered_sgpr, 36
+; GCN: .set .Ldirect_recursion_use_stack.private_seg_size, 2064
+; GCN: .set .Ldirect_recursion_use_stack.uses_vcc, 1
+; GCN: .set .Ldirect_recursion_use_stack.uses_flat_scratch, 0
+; GCN: .set .Ldirect_recursion_use_stack.has_dyn_sized_stack, 0
+; GCN: .set .Ldirect_recursion_use_stack.has_recursion, 1
+; GCN: .set .Ldirect_recursion_use_stack.has_indirect_call, 0
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 2064
@@ -464,15 +476,15 @@ ret:
}
; GCN-LABEL: {{^}}usage_direct_recursion:
-; GCN: .set usage_direct_recursion.num_vgpr, max(32, direct_recursion_use_stack.num_vgpr)
-; GCN: .set usage_direct_recursion.num_agpr, max(0, direct_recursion_use_stack.num_agpr)
-; GCN: .set usage_direct_recursion.numbered_sgpr, max(33, direct_recursion_use_stack.numbered_sgpr)
-; GCN: .set usage_direct_recursion.private_seg_size, 0+(max(direct_recursion_use_stack.private_seg_size))
-; GCN: .set usage_direct_recursion.uses_vcc, or(1, direct_recursion_use_stack.uses_vcc)
-; GCN: .set usage_direct_recursion.uses_flat_scratch, or(1, direct_recursion_use_stack.uses_flat_scratch)
-; GCN: .set usage_direct_recursion.has_dyn_sized_stack, or(0, direct_recursion_use_stack.has_dyn_sized_stack)
-; GCN: .set usage_direct_recursion.has_recursion, or(1, direct_recursion_use_stack.has_recursion)
-; GCN: .set usage_direct_recursion.has_indirect_call, or(0, direct_recursion_use_stack.has_indirect_call)
+; GCN: .set .Lusage_direct_recursion.num_vgpr, max(32, .Ldirect_recursion_use_stack.num_vgpr)
+; GCN: .set .Lusage_direct_recursion.num_agpr, max(0, .Ldirect_recursion_use_stack.num_agpr)
+; GCN: .set .Lusage_direct_recursion.numbered_sgpr, max(33, .Ldirect_recursion_use_stack.numbered_sgpr)
+; GCN: .set .Lusage_direct_recursion.private_seg_size, 0+(max(.Ldirect_recursion_use_stack.private_seg_size))
+; GCN: .set .Lusage_direct_recursion.uses_vcc, or(1, .Ldirect_recursion_use_stack.uses_vcc)
+; GCN: .set .Lusage_direct_recursion.uses_flat_scratch, or(1, .Ldirect_recursion_use_stack.uses_flat_scratch)
+; GCN: .set .Lusage_direct_recursion.has_dyn_sized_stack, or(0, .Ldirect_recursion_use_stack.has_dyn_sized_stack)
+; GCN: .set .Lusage_direct_recursion.has_recursion, or(1, .Ldirect_recursion_use_stack.has_recursion)
+; GCN: .set .Lusage_direct_recursion.has_indirect_call, or(0, .Ldirect_recursion_use_stack.has_indirect_call)
; GCN: TotalNumSgprs: 42
; GCN: NumVgprs: 41
; GCN: ScratchSize: 2064
@@ -613,17 +625,17 @@ define amdgpu_kernel void @multi_call_with_multi_stage_recurse(i32 %n) #0 {
; Make sure there's no assert when a sgpr96 is used.
; GCN-LABEL: {{^}}count_use_sgpr96_external_call
-; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
-; GCN: .set count_use_sgpr96_external_call.num_agpr, max(0, amdgpu.max_num_agpr)
-; GCN: .set count_use_sgpr96_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
-; GCN: .set count_use_sgpr96_external_call.private_seg_size, 0
-; GCN: .set count_use_sgpr96_external_call.uses_vcc, 1
-; GCN: .set count_use_sgpr96_external_call.uses_flat_scratch, 1
-; GCN: .set count_use_sgpr96_external_call.has_dyn_sized_stack, 1
-; GCN: .set count_use_sgpr96_external_call.has_recursion, 0
-; GCN: .set count_use_sgpr96_external_call.has_indirect_call, 1
-; GCN: TotalNumSgprs: count_use_sgpr96_external_call.numbered_sgpr+6
-; GCN: NumVgprs: count_use_sgpr96_external_call.num_vgpr
+; GCN: .set .Lcount_use_sgpr96_external_call.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lcount_use_sgpr96_external_call.num_agpr, max(0, .Lamdgpu.max_num_agpr)
+; GCN: .set .Lcount_use_sgpr96_external_call.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
+; GCN: .set .Lcount_use_sgpr96_external_call.private_seg_size, 0
+; GCN: .set .Lcount_use_sgpr96_external_call.uses_vcc, 1
+; GCN: .set .Lcount_use_sgpr96_external_call.uses_flat_scratch, 1
+; GCN: .set .Lcount_use_sgpr96_external_call.has_dyn_sized_stack, 1
+; GCN: .set .Lcount_use_sgpr96_external_call.has_recursion, 0
+; GCN: .set .Lcount_use_sgpr96_external_call.has_indirect_call, 1
+; GCN: TotalNumSgprs: .Lcount_use_sgpr96_external_call.numbered_sgpr+6
+; GCN: NumVgprs: .Lcount_use_sgpr96_external_call.num_vgpr
; GCN: ScratchSize: 0
define amdgpu_kernel void @count_use_sgpr96_external_call() {
entry:
@@ -634,17 +646,17 @@ entry:
; Make sure there's no assert when a sgpr160 is used.
; GCN-LABEL: {{^}}count_use_sgpr160_external_call
-; GCN: .set count_use_sgpr160_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
-; GCN: .set count_use_sgpr160_external_call.num_agpr, max(0, amdgpu.max_num_agpr)
-; GCN: .set count_use_sgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
-; GCN: .set count_use_sgpr160_external_call.private_seg_size, 0
-; GCN: .set count_use_sgpr160_external_call.uses_vcc, 1
-; GCN: .set count_use_sgpr160_external_call.uses_flat_scratch, 1
-; GCN: .set count_use_sgpr160_external_call.has_dyn_sized_stack, 1
-; GCN: .set count_use_sgpr160_external_call.has_recursion, 0
-; GCN: .set count_use_sgpr160_external_call.has_indirect_call, 1
-; GCN: TotalNumSgprs: count_use_sgpr160_external_call.numbered_sgpr+6
-; GCN: NumVgprs: count_use_sgpr160_external_call.num_vgpr
+; GCN: .set .Lcount_use_sgpr160_external_call.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lcount_use_sgpr160_external_call.num_agpr, max(0, .Lamdgpu.max_num_agpr)
+; GCN: .set .Lcount_use_sgpr160_external_call.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
+; GCN: .set .Lcount_use_sgpr160_external_call.private_seg_size, 0
+; GCN: .set .Lcount_use_sgpr160_external_call.uses_vcc, 1
+; GCN: .set .Lcount_use_sgpr160_external_call.uses_flat_scratch, 1
+; GCN: .set .Lcount_use_sgpr160_external_call.has_dyn_sized_stack, 1
+; GCN: .set .Lcount_use_sgpr160_external_call.has_recursion, 0
+; GCN: .set .Lcount_use_sgpr160_external_call.has_indirect_call, 1
+; GCN: TotalNumSgprs: .Lcount_use_sgpr160_external_call.numbered_sgpr+6
+; GCN: NumVgprs: .Lcount_use_sgpr160_external_call.num_vgpr
; GCN: ScratchSize: 0
define amdgpu_kernel void @count_use_sgpr160_external_call() {
entry:
@@ -655,17 +667,17 @@ entry:
; Make sure there's no assert when a vgpr160 is used.
; GCN-LABEL: {{^}}count_use_vgpr160_external_call
-; GCN: .set count_use_vgpr160_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
-; GCN: .set count_use_vgpr160_external_call.num_agpr, max(0, amdgpu.max_num_agpr)
-; GCN: .set count_use_vgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
-; GCN: .set count_use_vgpr160_external_call.private_seg_size, 0
-; GCN: .set count_use_vgpr160_external_call.uses_vcc, 1
-; GCN: .set count_use_vgpr160_external_call.uses_flat_scratch, 1
-; GCN: .set count_use_vgpr160_external_call.has_dyn_sized_stack, 1
-; GCN: .set count_use_vgpr160_external_call.has_recursion, 0
-; GCN: .set count_use_vgpr160_external_call.has_indirect_call, 1
-; GCN: TotalNumSgprs: count_use_vgpr160_external_call.numbered_sgpr+6
-; GCN: NumVgprs: count_use_vgpr160_external_call.num_vgpr
+; GCN: .set .Lcount_use_vgpr160_external_call.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lcount_use_vgpr160_external_call.num_agpr, max(0, .Lamdgpu.max_num_agpr)
+; GCN: .set .Lcount_use_vgpr160_external_call.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
+; GCN: .set .Lcount_use_vgpr160_external_call.private_seg_size, 0
+; GCN: .set .Lcount_use_vgpr160_external_call.uses_vcc, 1
+; GCN: .set .Lcount_use_vgpr160_external_call.uses_flat_scratch, 1
+; GCN: .set .Lcount_use_vgpr160_external_call.has_dyn_sized_stack, 1
+; GCN: .set .Lcount_use_vgpr160_external_call.has_recursion, 0
+; GCN: .set .Lcount_use_vgpr160_external_call.has_indirect_call, 1
+; GCN: TotalNumSgprs: .Lcount_use_vgpr160_external_call.numbered_sgpr+6
+; GCN: NumVgprs: .Lcount_use_vgpr160_external_call.num_vgpr
; GCN: ScratchSize: 0
define amdgpu_kernel void @count_use_vgpr160_external_call() {
entry:
@@ -675,9 +687,9 @@ entry:
}
; Added at the of the .s are the module level maximums
-; GCN: .set amdgpu.max_num_vgpr, 50
-; GCN: .set amdgpu.max_num_agpr, 0
-; GCN: .set amdgpu.max_num_sgpr, 80
+; GCN: .set .Lamdgpu.max_num_vgpr, 50
+; GCN: .set .Lamdgpu.max_num_agpr, 0
+; GCN: .set .Lamdgpu.max_num_sgpr, 80
attributes #0 = { nounwind noinline norecurse }
attributes #1 = { nounwind noinline norecurse }
diff --git a/llvm/test/CodeGen/AMDGPU/recursion.ll b/llvm/test/CodeGen/AMDGPU/recursion.ll
index c0d228e1254e64..c19029275329da 100644
--- a/llvm/test/CodeGen/AMDGPU/recursion.ll
+++ b/llvm/test/CodeGen/AMDGPU/recursion.ll
@@ -3,11 +3,11 @@
; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=V5 %s
; CHECK-LABEL: {{^}}recursive:
-; CHECK: .set recursive.private_seg_size, 16+(max(16384))
+; CHECK: .set .Lrecursive.private_seg_size, 16+(max(16384))
; CHECK: ScratchSize: 16
; V5-LABEL: {{^}}recursive:
-; V5: .set recursive.has_recursion, 1
+; V5: .set .Lrecursive.has_recursion, 1
define void @recursive() {
call void @recursive()
store volatile i32 0, ptr addrspace(1) undef
@@ -15,22 +15,22 @@ define void @recursive() {
}
; CHECK-LABEL: {{^}}tail_recursive:
-; CHECK: .set tail_recursive.private_seg_size, 0
+; CHECK: .set .Ltail_recursive.private_seg_size, 0
; CHECK: ScratchSize: 0
define void @tail_recursive() {
tail call void @tail_recursive()
ret void
}
-; CHECK: .set calls_tail_recursive.private_seg_size, 0+(max(tail_recursive.private_seg_size))
+; CHECK: .set .Lcalls_tail_recursive.private_seg_size, 0+(max(.Ltail_recursive.private_seg_size))
define void @calls_tail_recursive() norecurse {
tail call void @tail_recursive()
ret void
}
; CHECK-LABEL: {{^}}tail_recursive_with_stack:
-; CHECK: .set tail_recursive_with_stack.private_seg_size, 8
-; CHECK: .set tail_recursive_with_stack.has_recursion, 1
+; CHECK: .set .Ltail_recursive_with_stack.private_seg_size, 8
+; CHECK: .set .Ltail_recursive_with_stack.has_recursion, 1
define void @tail_recursive_with_stack() {
%alloca = alloca i32, addrspace(5)
store volatile i32 0, ptr addrspace(5) %alloca
@@ -41,11 +41,11 @@ define void @tail_recursive_with_stack() {
; For an arbitrary recursive call, report a large number for unknown stack
; usage for code object v4 and older
; CHECK-LABEL: {{^}}calls_recursive:
-; CHECK: .set calls_recursive.private_seg_size, 0+(max(16384, recursive.private_seg_size))
+; CHECK: .set .Lcalls_recursive.private_seg_size, 0+(max(16384, .Lrecursive.private_seg_size))
;
; V5-LABEL: {{^}}calls_recursive:
-; V5: .set calls_recursive.private_seg_size, 0+(max(recursive.private_seg_size))
-; V5: .set calls_recursive.has_dyn_sized_stack, or(0, recursive.has_dyn_sized_stack)
+; V5: .set .Lcalls_recursive.private_seg_size, 0+(max(.Lrecursive.private_seg_size))
+; V5: .set .Lcalls_recursive.has_dyn_sized_stack, or(0, .Lrecursive.has_dyn_sized_stack)
define amdgpu_kernel void @calls_recursive() {
call void @recursive()
ret void
@@ -54,7 +54,7 @@ define amdgpu_kernel void @calls_recursive() {
; Make sure we do not report a huge stack size for tail recursive
; functions
; CHECK-LABEL: {{^}}kernel_indirectly_calls_tail_recursive:
-; CHECK: .set kernel_indirectly_calls_tail_recursive.private_seg_size, 0+(max(calls_tail_recursive.private_seg_size))
+; CHECK: .set .Lkernel_indirectly_calls_tail_recursive.private_seg_size, 0+(max(.Lcalls_tail_recursive.private_seg_size))
define amdgpu_kernel void @kernel_indirectly_calls_tail_recursive() {
call void @calls_tail_recursive()
ret void
@@ -65,22 +65,22 @@ define amdgpu_kernel void @kernel_indirectly_calls_tail_recursive() {
; in the kernel.
; CHECK-LABEL: {{^}}kernel_calls_tail_recursive:
-; CHECK: .set kernel_calls_tail_recursive.private_seg_size, 0+(max(16384, tail_recursive.private_seg_size))
+; CHECK: .set .Lkernel_calls_tail_recursive.private_seg_size, 0+(max(16384, .Ltail_recursive.private_seg_size))
;
; V5-LABEL: {{^}}kernel_calls_tail_recursive:
-; V5: .set kernel_calls_tail_recursive.private_seg_size, 0+(max(tail_recursive.private_seg_size))
-; V5: .set kernel_calls_tail_recursive.has_recursion, or(1, tail_recursive.has_recursion)
+; V5: .set .Lkernel_calls_tail_recursive.private_seg_size, 0+(max(.Ltail_recursive.private_seg_size))
+; V5: .set .Lkernel_calls_tail_recursive.has_recursion, or(1, .Ltail_recursive.has_recursion)
define amdgpu_kernel void @kernel_calls_tail_recursive() {
call void @tail_recursive()
ret void
}
; CHECK-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
-; CHECK: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(16384, tail_recursive_with_stack.private_seg_size))
+; CHECK: .set .Lkernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(16384, .Ltail_recursive_with_stack.private_seg_size))
;
; V5-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
-; V5: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(tail_recursive_with_stack.private_seg_size))
-; V5: .set kernel_calls_tail_recursive_with_stack.has_dyn_sized_stack, or(0, tail_recursive_with_stack.has_dyn_sized_stack)
+; V5: .set .Lkernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(.Ltail_recursive_with_stack.private_seg_size))
+; V5: .set .Lkernel_calls_tail_recursive_with_stack.has_dyn_sized_stack, or(0, .Ltail_recursive_with_stack.has_dyn_sized_stack)
define amdgpu_kernel void @kernel_calls_tail_recursive_with_stack() {
call void @tail_recursive_with_stack()
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
index 8bbae59f468f1d..849b1e2a7fce43 100644
--- a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
+++ b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
@@ -141,12 +141,12 @@ define void @empty_func() !dbg !8 {
}
; STDERR: remark: foo.cl:64:0: Function Name: test_indirect_call
-; STDERR-NEXT: remark: foo.cl:64:0: TotalSGPRs: test_indirect_call.numbered_sgpr+6
-; STDERR-NEXT: remark: foo.cl:64:0: VGPRs: test_indirect_call.num_vgpr
-; STDERR-NEXT: remark: foo.cl:64:0: AGPRs: test_indirect_call.num_agpr
+; STDERR-NEXT: remark: foo.cl:64:0: TotalSGPRs: .Ltest_indirect_call.numbered_sgpr+6
+; STDERR-NEXT: remark: foo.cl:64:0: VGPRs: .Ltest_indirect_call.num_vgpr
+; STDERR-NEXT: remark: foo.cl:64:0: AGPRs: .Ltest_indirect_call.num_agpr
; STDERR-NEXT: remark: foo.cl:64:0: ScratchSize [bytes/lane]: 0
; STDERR-NEXT: remark: foo.cl:64:0: Dynamic Stack: True
-; STDERR-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 8, max(test_indirect_call.numbered_sgpr+(extrasgprs(test_indirect_call.uses_vcc, test_indirect_call.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(test_indirect_call.num_agpr, test_indirect_call.num_vgpr), 1, 0))
+; STDERR-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 8, max(.Ltest_indirect_call.numbered_sgpr+(extrasgprs(.Ltest_indirect_call.uses_vcc, .Ltest_indirect_call.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(.Ltest_indirect_call.num_agpr, .Ltest_indirect_call.num_vgpr), 1, 0))
; STDERR-NEXT: remark: foo.cl:64:0: SGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:64:0: VGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:64:0: LDS Size [bytes/block]: 0
@@ -159,12 +159,12 @@ define amdgpu_kernel void @test_indirect_call() !dbg !9 {
}
; STDERR: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack
-; STDERR-NEXT: remark: foo.cl:74:0: TotalSGPRs: test_indirect_w_static_stack.numbered_sgpr+6
-; STDERR-NEXT: remark: foo.cl:74:0: VGPRs: test_indirect_w_static_stack.num_vgpr
-; STDERR-NEXT: remark: foo.cl:74:0: AGPRs: test_indirect_w_static_stack.num_agpr
+; STDERR-NEXT: remark: foo.cl:74:0: TotalSGPRs: .Ltest_indirect_w_static_stack.numbered_sgpr+6
+; STDERR-NEXT: remark: foo.cl:74:0: VGPRs: .Ltest_indirect_w_static_stack.num_vgpr
+; STDERR-NEXT: remark: foo.cl:74:0: AGPRs: .Ltest_indirect_w_static_stack.num_agpr
; STDERR-NEXT: remark: foo.cl:74:0: ScratchSize [bytes/lane]: 144
; STDERR-NEXT: remark: foo.cl:74:0: Dynamic Stack: True
-; STDERR-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 8, max(test_indirect_w_static_stack.numbered_sgpr+(extrasgprs(test_indirect_w_static_stack.uses_vcc, test_indirect_w_static_stack.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(test_indirect_w_static_stack.num_agpr, test_indirect_w_static_stack.num_vgpr), 1, 0))
+; STDERR-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 8, max(.Ltest_indirect_w_static_stack.numbered_sgpr+(extrasgprs(.Ltest_indirect_w_static_stack.uses_vcc, .Ltest_indirect_w_static_stack.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(.Ltest_indirect_w_static_stack.num_agpr, .Ltest_indirect_w_static_stack.num_vgpr), 1, 0))
; STDERR-NEXT: remark: foo.cl:74:0: SGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:74:0: VGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:74:0: LDS Size [bytes/block]: 0
diff --git a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
index 5d5aad76afd095..bd7b473fd806f2 100644
--- a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
+++ b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
@@ -23,8 +23,8 @@ define internal fastcc void @unreachable() {
; GCN-NOT: .amdhsa_uses_dynamic_stack
; GCN-V5: .amdhsa_uses_dynamic_stack
-; ALL: .set entry.private_seg_size, 0
-; ALL: .set entry.has_dyn_sized_stack, 0
+; ALL: .set .Lentry.private_seg_size, 0
+; ALL: .set .Lentry.has_dyn_sized_stack, 0
define amdgpu_kernel void @entry() {
bb0:
br i1 false, label %bb1, label %bb2
>From 78c6218883e5e090cd1ebbc2cd3b88b8b5bc56e0 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Mon, 20 Jan 2025 20:17:01 +0000
Subject: [PATCH 2/2] Localize symbols according to function linkage
---
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 60 ++-
.../Target/AMDGPU/AMDGPUMCResourceInfo.cpp | 60 ++-
llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.h | 4 +-
.../CodeGen/AMDGPU/agpr-register-count.ll | 34 +-
.../amdpal-metadata-agpr-register-count.ll | 6 +-
...-amdgpu-flat-work-group-size-vgpr-limit.ll | 56 +-
.../AMDGPU/call-alias-register-usage-agpr.ll | 12 +-
.../AMDGPU/call-alias-register-usage0.ll | 6 +-
.../AMDGPU/call-alias-register-usage1.ll | 10 +-
.../AMDGPU/call-alias-register-usage2.ll | 10 +-
.../AMDGPU/call-alias-register-usage3.ll | 10 +-
.../AMDGPU/call-graph-register-usage.ll | 30 +-
.../CodeGen/AMDGPU/function-resource-usage.ll | 488 +++++++++---------
llvm/test/CodeGen/AMDGPU/recursion.ll | 32 +-
.../AMDGPU/resource-optimization-remarks.ll | 16 +-
.../AMDGPU/resource-usage-dead-function.ll | 4 +-
16 files changed, 420 insertions(+), 418 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 737b2f740d6f77..6ff1c74d2da537 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -357,6 +357,7 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) {
}
void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
+ bool isLocal = F.hasLocalLinkage();
if (F.isDeclaration() || !AMDGPU::isModuleEntryFunctionCC(F.getCallingConv()))
return;
@@ -375,8 +376,8 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
const uint64_t MaxScratchPerWorkitem =
STM.getMaxWaveScratchSize() / STM.getWavefrontSize();
- MCSymbol *ScratchSizeSymbol =
- RI.getSymbol(FnSym->getName(), RIK::RIK_PrivateSegSize, OutContext);
+ MCSymbol *ScratchSizeSymbol = RI.getSymbol(
+ FnSym->getName(), RIK::RIK_PrivateSegSize, OutContext, isLocal);
uint64_t ScratchSize;
if (ScratchSizeSymbol->isVariable() &&
TryGetMCExprValue(ScratchSizeSymbol->getVariableValue(), ScratchSize) &&
@@ -389,7 +390,7 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
// Validate addressable scalar registers (i.e., prior to added implicit
// SGPRs).
MCSymbol *NumSGPRSymbol =
- RI.getSymbol(FnSym->getName(), RIK::RIK_NumSGPR, OutContext);
+ RI.getSymbol(FnSym->getName(), RIK::RIK_NumSGPR, OutContext, isLocal);
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
!STM.hasSGPRInitBug()) {
unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
@@ -406,9 +407,9 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
}
MCSymbol *VCCUsedSymbol =
- RI.getSymbol(FnSym->getName(), RIK::RIK_UsesVCC, OutContext);
- MCSymbol *FlatUsedSymbol =
- RI.getSymbol(FnSym->getName(), RIK::RIK_UsesFlatScratch, OutContext);
+ RI.getSymbol(FnSym->getName(), RIK::RIK_UsesVCC, OutContext, isLocal);
+ MCSymbol *FlatUsedSymbol = RI.getSymbol(
+ FnSym->getName(), RIK::RIK_UsesFlatScratch, OutContext, isLocal);
uint64_t VCCUsed, FlatUsed, NumSgpr;
if (NumSGPRSymbol->isVariable() && VCCUsedSymbol->isVariable() &&
@@ -435,9 +436,9 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
}
MCSymbol *NumVgprSymbol =
- RI.getSymbol(FnSym->getName(), RIK::RIK_NumVGPR, OutContext);
+ RI.getSymbol(FnSym->getName(), RIK::RIK_NumVGPR, OutContext, isLocal);
MCSymbol *NumAgprSymbol =
- RI.getSymbol(FnSym->getName(), RIK::RIK_NumAGPR, OutContext);
+ RI.getSymbol(FnSym->getName(), RIK::RIK_NumAGPR, OutContext, isLocal);
uint64_t NumVgpr, NumAgpr;
MachineModuleInfo &MMI =
@@ -655,6 +656,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
MCContext &Context = getObjFileLowering().getContext();
+ bool isLocal = MF.getFunction().hasLocalLinkage();
// FIXME: This should be an explicit check for Mesa.
if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) {
MCSectionELF *ConfigSection =
@@ -700,20 +702,24 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
{
using RIK = MCResourceInfo::ResourceInfoKind;
getTargetStreamer()->EmitMCResourceInfo(
- RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext),
- RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumAGPR, OutContext),
- RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumSGPR, OutContext),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext,
+ isLocal),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumAGPR, OutContext,
+ isLocal),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumSGPR, OutContext,
+ isLocal),
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
- OutContext),
- RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesVCC, OutContext),
+ OutContext, isLocal),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesVCC, OutContext,
+ isLocal),
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesFlatScratch,
- OutContext),
+ OutContext, isLocal),
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasDynSizedStack,
- OutContext),
- RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasRecursion,
- OutContext),
+ OutContext, isLocal),
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasRecursion, OutContext,
+ isLocal),
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasIndirectCall,
- OutContext));
+ OutContext, isLocal));
}
if (isVerbose()) {
@@ -726,19 +732,21 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->emitRawComment(" Function info:", false);
emitCommonFunctionComments(
- RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext)
+ RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext,
+ isLocal)
->getVariableValue(),
- STM.hasMAIInsts() ? RI.getSymbol(CurrentFnSym->getName(),
- RIK::RIK_NumAGPR, OutContext)
- ->getVariableValue()
- : nullptr,
+ STM.hasMAIInsts()
+ ? RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumAGPR,
+ OutContext, isLocal)
+ ->getVariableValue()
+ : nullptr,
RI.createTotalNumVGPRs(MF, Ctx),
RI.createTotalNumSGPRs(
MF,
MF.getSubtarget<GCNSubtarget>().getTargetID().isXnackOnOrAny(),
Ctx),
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
- OutContext)
+ OutContext, isLocal)
->getVariableValue(),
getFunctionCodeSize(MF), MFI);
return false;
@@ -927,6 +935,7 @@ static const MCExpr *computeAccumOffset(const MCExpr *NumVGPR, MCContext &Ctx) {
void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
const MachineFunction &MF) {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
+ bool isLocal = MF.getFunction().hasLocalLinkage();
MCContext &Ctx = MF.getContext();
auto CreateExpr = [&Ctx](int64_t Value) {
@@ -944,7 +953,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
auto GetSymRefExpr =
[&](MCResourceInfo::ResourceInfoKind RIK) -> const MCExpr * {
- MCSymbol *Sym = RI.getSymbol(CurrentFnSym->getName(), RIK, OutContext);
+ MCSymbol *Sym =
+ RI.getSymbol(CurrentFnSym->getName(), RIK, OutContext, isLocal);
return MCSymbolRefExpr::create(Sym, Ctx);
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index f5dd67f57cbec6..1c28884ddd49ab 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -23,11 +23,12 @@
using namespace llvm;
MCSymbol *MCResourceInfo::getSymbol(StringRef FuncName, ResourceInfoKind RIK,
- MCContext &OutContext) {
- auto GOCS = [FuncName, &OutContext](StringRef Suffix) {
- return OutContext.getOrCreateSymbol(
- Twine(OutContext.getAsmInfo()->getPrivateGlobalPrefix()) + FuncName +
- Twine(Suffix));
+ MCContext &OutContext, bool isLocal) {
+ auto GOCS = [FuncName, &OutContext, isLocal](StringRef Suffix) {
+ StringRef Prefix =
+ isLocal ? OutContext.getAsmInfo()->getPrivateGlobalPrefix() : "";
+ return OutContext.getOrCreateSymbol(Twine(Prefix) + FuncName +
+ Twine(Suffix));
};
switch (RIK) {
case RIK_NumVGPR:
@@ -54,8 +55,8 @@ MCSymbol *MCResourceInfo::getSymbol(StringRef FuncName, ResourceInfoKind RIK,
const MCExpr *MCResourceInfo::getSymRefExpr(StringRef FuncName,
ResourceInfoKind RIK,
- MCContext &Ctx) {
- return MCSymbolRefExpr::create(getSymbol(FuncName, RIK, Ctx), Ctx);
+ MCContext &Ctx, bool isLocal) {
+ return MCSymbolRefExpr::create(getSymbol(FuncName, RIK, Ctx, isLocal), Ctx);
}
void MCResourceInfo::assignMaxRegs(MCContext &OutContext) {
@@ -83,21 +84,15 @@ void MCResourceInfo::finalize(MCContext &OutContext) {
}
MCSymbol *MCResourceInfo::getMaxVGPRSymbol(MCContext &OutContext) {
- StringRef PrivatePrefix = OutContext.getAsmInfo()->getPrivateGlobalPrefix();
- return OutContext.getOrCreateSymbol(Twine(PrivatePrefix) +
- "amdgpu.max_num_vgpr");
+ return OutContext.getOrCreateSymbol("amdgpu.max_num_vgpr");
}
MCSymbol *MCResourceInfo::getMaxAGPRSymbol(MCContext &OutContext) {
- StringRef PrivatePrefix = OutContext.getAsmInfo()->getPrivateGlobalPrefix();
- return OutContext.getOrCreateSymbol(Twine(PrivatePrefix) +
- "amdgpu.max_num_agpr");
+ return OutContext.getOrCreateSymbol("amdgpu.max_num_agpr");
}
MCSymbol *MCResourceInfo::getMaxSGPRSymbol(MCContext &OutContext) {
- StringRef PrivatePrefix = OutContext.getAsmInfo()->getPrivateGlobalPrefix();
- return OutContext.getOrCreateSymbol(Twine(PrivatePrefix) +
- "amdgpu.max_num_sgpr");
+ return OutContext.getOrCreateSymbol("amdgpu.max_num_sgpr");
}
void MCResourceInfo::assignResourceInfoExpr(
@@ -105,11 +100,12 @@ void MCResourceInfo::assignResourceInfoExpr(
const MachineFunction &MF, const SmallVectorImpl<const Function *> &Callees,
MCContext &OutContext) {
const TargetMachine &TM = MF.getTarget();
+ bool isLocal = MF.getFunction().hasLocalLinkage();
MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
const MCConstantExpr *LocalConstExpr =
MCConstantExpr::create(LocalValue, OutContext);
const MCExpr *SymVal = LocalConstExpr;
- MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
+ MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext, isLocal);
if (!Callees.empty()) {
SmallVector<const MCExpr *, 8> ArgExprs;
SmallPtrSet<const Function *, 8> Seen;
@@ -119,9 +115,10 @@ void MCResourceInfo::assignResourceInfoExpr(
if (!Seen.insert(Callee).second)
continue;
+ bool isCalleeLocal = Callee->hasLocalLinkage();
MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
MCSymbol *CalleeValSym =
- getSymbol(CalleeFnSym->getName(), RIK, OutContext);
+ getSymbol(CalleeFnSym->getName(), RIK, OutContext, isCalleeLocal);
// Avoid constructing recursive definitions by detecting whether `Sym` is
// found transitively within any of its `CalleeValSym`.
@@ -164,6 +161,7 @@ void MCResourceInfo::gatherResourceInfo(
MCSymbol *MaxVGPRSym = getMaxVGPRSymbol(OutContext);
MCSymbol *MaxAGPRSym = getMaxAGPRSymbol(OutContext);
MCSymbol *MaxSGPRSym = getMaxSGPRSymbol(OutContext);
+ bool isLocal = MF.getFunction().hasLocalLinkage();
if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())) {
addMaxVGPRCandidate(FRI.NumVGPR);
@@ -181,7 +179,8 @@ void MCResourceInfo::gatherResourceInfo(
FRI.Callees, OutContext);
} else {
const MCExpr *SymRef = MCSymbolRefExpr::create(MaxSym, OutContext);
- MCSymbol *LocalNumSym = getSymbol(FnSym->getName(), RIK, OutContext);
+ MCSymbol *LocalNumSym =
+ getSymbol(FnSym->getName(), RIK, OutContext, isLocal);
const MCExpr *MaxWithLocal = AMDGPUMCExpr::createMax(
{MCConstantExpr::create(numRegs, OutContext), SymRef}, OutContext);
LocalNumSym->setVariableValue(MaxWithLocal);
@@ -196,7 +195,8 @@ void MCResourceInfo::gatherResourceInfo(
// The expression for private segment size should be: FRI.PrivateSegmentSize
// + max(FRI.Callees, FRI.CalleeSegmentSize)
SmallVector<const MCExpr *, 8> ArgExprs;
- MCSymbol *Sym = getSymbol(FnSym->getName(), RIK_PrivateSegSize, OutContext);
+ MCSymbol *Sym =
+ getSymbol(FnSym->getName(), RIK_PrivateSegSize, OutContext, isLocal);
if (FRI.CalleeSegmentSize)
ArgExprs.push_back(
MCConstantExpr::create(FRI.CalleeSegmentSize, OutContext));
@@ -207,9 +207,11 @@ void MCResourceInfo::gatherResourceInfo(
if (!Seen.insert(Callee).second)
continue;
if (!Callee->isDeclaration()) {
+ bool isCalleeLocal = Callee->hasLocalLinkage();
MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
MCSymbol *CalleeValSym =
- getSymbol(CalleeFnSym->getName(), RIK_PrivateSegSize, OutContext);
+ getSymbol(CalleeFnSym->getName(), RIK_PrivateSegSize, OutContext,
+ isCalleeLocal);
// Avoid constructing recursive definitions by detecting whether `Sym`
// is found transitively within any of its `CalleeValSym`.
@@ -232,7 +234,7 @@ void MCResourceInfo::gatherResourceInfo(
}
auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) {
- MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
+ MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext, isLocal);
Sym->setVariableValue(MCConstantExpr::create(LocalValue, OutContext));
};
@@ -264,9 +266,10 @@ const MCExpr *MCResourceInfo::createTotalNumVGPRs(const MachineFunction &MF,
MCContext &Ctx) {
const TargetMachine &TM = MF.getTarget();
MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
+ bool isLocal = MF.getFunction().hasLocalLinkage();
return AMDGPUMCExpr::createTotalNumVGPR(
- getSymRefExpr(FnSym->getName(), RIK_NumAGPR, Ctx),
- getSymRefExpr(FnSym->getName(), RIK_NumVGPR, Ctx), Ctx);
+ getSymRefExpr(FnSym->getName(), RIK_NumAGPR, Ctx, isLocal),
+ getSymRefExpr(FnSym->getName(), RIK_NumVGPR, Ctx, isLocal), Ctx);
}
const MCExpr *MCResourceInfo::createTotalNumSGPRs(const MachineFunction &MF,
@@ -274,11 +277,12 @@ const MCExpr *MCResourceInfo::createTotalNumSGPRs(const MachineFunction &MF,
MCContext &Ctx) {
const TargetMachine &TM = MF.getTarget();
MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
+ bool isLocal = MF.getFunction().hasLocalLinkage();
return MCBinaryExpr::createAdd(
- getSymRefExpr(FnSym->getName(), RIK_NumSGPR, Ctx),
+ getSymRefExpr(FnSym->getName(), RIK_NumSGPR, Ctx, isLocal),
AMDGPUMCExpr::createExtraSGPRs(
- getSymRefExpr(FnSym->getName(), RIK_UsesVCC, Ctx),
- getSymRefExpr(FnSym->getName(), RIK_UsesFlatScratch, Ctx), hasXnack,
- Ctx),
+ getSymRefExpr(FnSym->getName(), RIK_UsesVCC, Ctx, isLocal),
+ getSymRefExpr(FnSym->getName(), RIK_UsesFlatScratch, Ctx, isLocal),
+ hasXnack, Ctx),
Ctx);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.h
index 9dc34100e644e6..ba11062336e14a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.h
@@ -71,9 +71,9 @@ class MCResourceInfo {
}
MCSymbol *getSymbol(StringRef FuncName, ResourceInfoKind RIK,
- MCContext &OutContext);
+ MCContext &OutContext, bool isLocal);
const MCExpr *getSymRefExpr(StringRef FuncName, ResourceInfoKind RIK,
- MCContext &Ctx);
+ MCContext &Ctx, bool isLocal);
void reset();
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
index b1dd4fecab2cb9..0e16ea10c019ac 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
@@ -154,28 +154,28 @@ bb:
declare void @undef_func()
; GCN-LABEL: {{^}}kernel_call_undef_func:
-; GCN: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0)
-; GFX90A: .amdhsa_accum_offset ((((((alignto(max(1, .Lkernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
-; GCN: .set .Lkernel_call_undef_func.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
-; GCN: .set .Lkernel_call_undef_func.num_agpr, max(0, .Lamdgpu.max_num_agpr)
-; GCN: NumVgprs: .Lkernel_call_undef_func.num_vgpr
-; GCN: NumAgprs: .Lkernel_call_undef_func.num_agpr
-; GCN: TotalNumVgprs: totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr)
-; GFX908: VGPRBlocks: ((alignto(max(max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0), 1), 4))/4)-1
-; GFX90A: VGPRBlocks: ((alignto(max(max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0), 1), 8))/8)-1
-; GCN: NumVGPRsForWavesPerEU: max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0)
-; GFX90A: AccumOffset: ((((alignto(max(1, .Lkernel_call_undef_func.num_vgpr), 4))/4)-1)+1)*4
-; GFX908: Occupancy: occupancy(10, 4, 256, 8, 10, max(.Lkernel_call_undef_func.numbered_sgpr+(extrasgprs(.Lkernel_call_undef_func.uses_vcc, .Lkernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0))
-; GFX90A: Occupancy: occupancy(8, 8, 512, 8, 8, max(.Lkernel_call_undef_func.numbered_sgpr+(extrasgprs(.Lkernel_call_undef_func.uses_vcc, .Lkernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0))
-; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: ((((alignto(max(1, .Lkernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63
+; GCN: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0)
+; GFX90A: .amdhsa_accum_offset ((((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
+; GCN: .set kernel_call_undef_func.num_vgpr, max(32, amdgpu.max_num_vgpr)
+; GCN: .set kernel_call_undef_func.num_agpr, max(0, amdgpu.max_num_agpr)
+; GCN: NumVgprs: kernel_call_undef_func.num_vgpr
+; GCN: NumAgprs: kernel_call_undef_func.num_agpr
+; GCN: TotalNumVgprs: totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr)
+; GFX908: VGPRBlocks: ((alignto(max(max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0), 1), 4))/4)-1
+; GFX90A: VGPRBlocks: ((alignto(max(max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0), 1), 8))/8)-1
+; GCN: NumVGPRsForWavesPerEU: max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0)
+; GFX90A: AccumOffset: ((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)+1)*4
+; GFX908: Occupancy: occupancy(10, 4, 256, 8, 10, max(kernel_call_undef_func.numbered_sgpr+(extrasgprs(kernel_call_undef_func.uses_vcc, kernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0))
+; GFX90A: Occupancy: occupancy(8, 8, 512, 8, 8, max(kernel_call_undef_func.numbered_sgpr+(extrasgprs(kernel_call_undef_func.uses_vcc, kernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0))
+; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: ((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63
define amdgpu_kernel void @kernel_call_undef_func() #0 {
bb:
call void @undef_func()
ret void
}
-; GCN: .set .Lamdgpu.max_num_vgpr, 32
-; GCN-NEXT: .set .Lamdgpu.max_num_agpr, 32
-; GCN-NEXT: .set .Lamdgpu.max_num_sgpr, 34
+; GCN: .set amdgpu.max_num_vgpr, 32
+; GCN-NEXT: .set amdgpu.max_num_agpr, 32
+; GCN-NEXT: .set amdgpu.max_num_sgpr, 34
attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
index 15284ad45a9261..8f4cb364751d88 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
@@ -60,9 +60,9 @@ bb:
declare void @undef_func()
; CHECK: .type kernel_call_undef_func
-; CHECK: .set .Lkernel_call_undef_func.num_agpr, max(0, .Lamdgpu.max_num_agpr)
-; CHECK: NumAgprs: .Lkernel_call_undef_func.num_agpr
-; CHECK: .set .Lamdgpu.max_num_agpr, 32
+; CHECK: .set kernel_call_undef_func.num_agpr, max(0, amdgpu.max_num_agpr)
+; CHECK: NumAgprs: kernel_call_undef_func.num_agpr
+; CHECK: .set amdgpu.max_num_agpr, 32
define amdgpu_kernel void @kernel_call_undef_func() #0 {
bb:
call void @undef_func()
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll
index 374fd32ec5997f..d45e116beb4e3e 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll
@@ -547,20 +547,20 @@ define amdgpu_kernel void @f256() #256 {
attributes #256 = { nounwind "amdgpu-flat-work-group-size"="256,256" }
; GCN-LABEL: {{^}}f512:
-; GFX9: .set .Lf512.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
-; GFX90A: .set .Lf512.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
-; GFX90A: .set .Lf512.num_agpr, max(128, .Lamdgpu.max_num_agpr)
-; GFX10WGP-WAVE32: .set .Lf512.num_vgpr, max(256, .Lamdgpu.max_num_vgpr)
-; GFX10WGP-WAVE64: .set .Lf512.num_vgpr, max(256, .Lamdgpu.max_num_vgpr)
-; GFX10CU-WAVE32: .set .Lf512.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
-; GFX10CU-WAVE64: .set .Lf512.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
-; GFX11WGP-WAVE32: .set .Lf512.num_vgpr, max(256, .Lamdgpu.max_num_vgpr)
-; GFX11WGP-WAVE64: .set .Lf512.num_vgpr, max(256, .Lamdgpu.max_num_vgpr)
-; GFX11CU-WAVE32: .set .Lf512.num_vgpr, max(192, .Lamdgpu.max_num_vgpr)
-; GFX11CU-WAVE64: .set .Lf512.num_vgpr, max(192, .Lamdgpu.max_num_vgpr)
-; GCN: NumVgprs: .Lf512.num_vgpr
-; GFX90A: NumAgprs: .Lf512.num_agpr
-; GFX90A: TotalNumVgprs: totalnumvgprs(.Lf512.num_agpr, .Lf512.num_vgpr)
+; GFX9: .set f512.num_vgpr, max(128, amdgpu.max_num_vgpr)
+; GFX90A: .set f512.num_vgpr, max(128, amdgpu.max_num_vgpr)
+; GFX90A: .set f512.num_agpr, max(128, amdgpu.max_num_agpr)
+; GFX10WGP-WAVE32: .set f512.num_vgpr, max(256, amdgpu.max_num_vgpr)
+; GFX10WGP-WAVE64: .set f512.num_vgpr, max(256, amdgpu.max_num_vgpr)
+; GFX10CU-WAVE32: .set f512.num_vgpr, max(128, amdgpu.max_num_vgpr)
+; GFX10CU-WAVE64: .set f512.num_vgpr, max(128, amdgpu.max_num_vgpr)
+; GFX11WGP-WAVE32: .set f512.num_vgpr, max(256, amdgpu.max_num_vgpr)
+; GFX11WGP-WAVE64: .set f512.num_vgpr, max(256, amdgpu.max_num_vgpr)
+; GFX11CU-WAVE32: .set f512.num_vgpr, max(192, amdgpu.max_num_vgpr)
+; GFX11CU-WAVE64: .set f512.num_vgpr, max(192, amdgpu.max_num_vgpr)
+; GCN: NumVgprs: f512.num_vgpr
+; GFX90A: NumAgprs: f512.num_agpr
+; GFX90A: TotalNumVgprs: totalnumvgprs(f512.num_agpr, f512.num_vgpr)
define amdgpu_kernel void @f512() #512 {
call void @foo()
call void @use256vgprs()
@@ -569,20 +569,20 @@ define amdgpu_kernel void @f512() #512 {
attributes #512 = { nounwind "amdgpu-flat-work-group-size"="512,512" }
; GCN-LABEL: {{^}}f1024:
-; GFX9: .set .Lf1024.num_vgpr, max(64, .Lamdgpu.max_num_vgpr)
-; GFX90A: .set .Lf1024.num_vgpr, max(64, .Lamdgpu.max_num_vgpr)
-; GFX90A: .set .Lf1024.num_agpr, max(64, .Lamdgpu.max_num_agpr)
-; GFX10WGP-WAVE32: .set .Lf1024.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
-; GFX10WGP-WAVE64: .set .Lf1024.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
-; GFX10CU-WAVE32: .set .Lf1024.num_vgpr, max(64, .Lamdgpu.max_num_vgpr)
-; GFX10CU-WAVE64: .set .Lf1024.num_vgpr, max(64, .Lamdgpu.max_num_vgpr)
-; GFX11WGP-WAVE32: .set .Lf1024.num_vgpr, max(192, .Lamdgpu.max_num_vgpr)
-; GFX11WGP-WAVE64: .set .Lf1024.num_vgpr, max(192, .Lamdgpu.max_num_vgpr)
-; GFX11CU-WAVE32: .set .Lf1024.num_vgpr, max(96, .Lamdgpu.max_num_vgpr)
-; GFX11CU-WAVE64: .set .Lf1024.num_vgpr, max(96, .Lamdgpu.max_num_vgpr)
-; GCN: NumVgprs: .Lf1024.num_vgpr
-; GFX90A: NumAgprs: .Lf1024.num_agpr
-; GFX90A: TotalNumVgprs: totalnumvgprs(.Lf1024.num_agpr, .Lf1024.num_vgpr)
+; GFX9: .set f1024.num_vgpr, max(64, amdgpu.max_num_vgpr)
+; GFX90A: .set f1024.num_vgpr, max(64, amdgpu.max_num_vgpr)
+; GFX90A: .set f1024.num_agpr, max(64, amdgpu.max_num_agpr)
+; GFX10WGP-WAVE32: .set f1024.num_vgpr, max(128, amdgpu.max_num_vgpr)
+; GFX10WGP-WAVE64: .set f1024.num_vgpr, max(128, amdgpu.max_num_vgpr)
+; GFX10CU-WAVE32: .set f1024.num_vgpr, max(64, amdgpu.max_num_vgpr)
+; GFX10CU-WAVE64: .set f1024.num_vgpr, max(64, amdgpu.max_num_vgpr)
+; GFX11WGP-WAVE32: .set f1024.num_vgpr, max(192, amdgpu.max_num_vgpr)
+; GFX11WGP-WAVE64: .set f1024.num_vgpr, max(192, amdgpu.max_num_vgpr)
+; GFX11CU-WAVE32: .set f1024.num_vgpr, max(96, amdgpu.max_num_vgpr)
+; GFX11CU-WAVE64: .set f1024.num_vgpr, max(96, amdgpu.max_num_vgpr)
+; GCN: NumVgprs: f1024.num_vgpr
+; GFX90A: NumAgprs: f1024.num_agpr
+; GFX90A: TotalNumVgprs: totalnumvgprs(f1024.num_agpr, f1024.num_vgpr)
define amdgpu_kernel void @f1024() #1024 {
call void @foo()
call void @use256vgprs()
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
index 2b6cb53596c5db..1d49e005234e33 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
@@ -8,13 +8,13 @@
@alias = hidden alias void (), ptr @aliasee_default
; ALL-LABEL: {{^}}kernel:
-; ALL: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel.num_agpr, .Lkernel.num_vgpr), 1, 0)
-; ALL-NEXT: .amdhsa_next_free_sgpr (max(.Lkernel.numbered_sgpr+(extrasgprs(.Lkernel.uses_vcc, .Lkernel.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(.Lkernel.uses_vcc, .Lkernel.uses_flat_scratch, 1))
-; GFX90A-NEXT: .amdhsa_accum_offset ((((((alignto(max(1, .Lkernel.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
+; ALL: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel.num_agpr, kernel.num_vgpr), 1, 0)
+; ALL-NEXT: .amdhsa_next_free_sgpr (max(kernel.numbered_sgpr+(extrasgprs(kernel.uses_vcc, kernel.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel.uses_vcc, kernel.uses_flat_scratch, 1))
+; GFX90A-NEXT: .amdhsa_accum_offset ((((((alignto(max(1, kernel.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
-; ALL: .set kernel.num_vgpr, max(41, aliasee_default.num_vgpr)
-; ALL-NEXT: .set kernel.num_agpr, max(0, aliasee_default.num_agpr)
-; ALL-NEXT: .set kernel.numbered_sgpr, max(33, aliasee_default.numbered_sgpr)
+; ALL: .set kernel.num_vgpr, max(41, .Laliasee_default.num_vgpr)
+; ALL-NEXT: .set kernel.num_agpr, max(0, .Laliasee_default.num_agpr)
+; ALL-NEXT: .set kernel.numbered_sgpr, max(33, .Laliasee_default.numbered_sgpr)
define amdgpu_kernel void @kernel() #0 {
bb:
call void @alias() #2
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
index cd14dc57191723..f719f50ef6f134 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
@@ -7,9 +7,9 @@
@alias0 = hidden alias void (), ptr @aliasee_default_vgpr64_sgpr102
; CHECK-LABEL: {{^}}kernel0:
-; CHECK: .set kernel0.num_vgpr, max(41, aliasee_default_vgpr64_sgpr102.num_vgpr)
-; CHECK-NEXT: .set kernel0.num_agpr, max(0, aliasee_default_vgpr64_sgpr102.num_agpr)
-; CHECK-NEXT: .set kernel0.numbered_sgpr, max(33, aliasee_default_vgpr64_sgpr102.numbered_sgpr)
+; CHECK: .set kernel0.num_vgpr, max(41, .Laliasee_default_vgpr64_sgpr102.num_vgpr)
+; CHECK-NEXT: .set kernel0.num_agpr, max(0, .Laliasee_default_vgpr64_sgpr102.num_agpr)
+; CHECK-NEXT: .set kernel0.numbered_sgpr, max(33, .Laliasee_default_vgpr64_sgpr102.numbered_sgpr)
define amdgpu_kernel void @kernel0() #0 {
bb:
call void @alias0() #2
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
index 367bbafed55808..cbc8e7882c45ee 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
@@ -9,12 +9,12 @@
; The parent kernel has a higher VGPR usage than the possible callees.
; CHECK-LABEL: {{^}}kernel1:
-; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel1.num_agpr, .Lkernel1.num_vgpr), 1, 0)
-; CHECK-NEXT: .amdhsa_next_free_sgpr (max(.Lkernel1.numbered_sgpr+(extrasgprs(.Lkernel1.uses_vcc, .Lkernel1.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(.Lkernel1.uses_vcc, .Lkernel1.uses_flat_scratch, 1))
+; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel1.num_agpr, kernel1.num_vgpr), 1, 0)
+; CHECK-NEXT: .amdhsa_next_free_sgpr (max(kernel1.numbered_sgpr+(extrasgprs(kernel1.uses_vcc, kernel1.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel1.uses_vcc, kernel1.uses_flat_scratch, 1))
-; CHECK: .set kernel1.num_vgpr, max(42, aliasee_vgpr32_sgpr76.num_vgpr)
-; CHECK-NEXT: .set kernel1.num_agpr, max(0, aliasee_vgpr32_sgpr76.num_agpr)
-; CHECK-NEXT: .set kernel1.numbered_sgpr, max(33, aliasee_vgpr32_sgpr76.numbered_sgpr)
+; CHECK: .set kernel1.num_vgpr, max(42, .Laliasee_vgpr32_sgpr76.num_vgpr)
+; CHECK-NEXT: .set kernel1.num_agpr, max(0, .Laliasee_vgpr32_sgpr76.num_agpr)
+; CHECK-NEXT: .set kernel1.numbered_sgpr, max(33, .Laliasee_vgpr32_sgpr76.numbered_sgpr)
define amdgpu_kernel void @kernel1() #0 {
bb:
call void asm sideeffect "; clobber v40 ", "~{v40}"()
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
index c0b28661f62067..cdefbab93c62d9 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
@@ -7,12 +7,12 @@
@alias2 = hidden alias void (), ptr @aliasee_vgpr64_sgpr102
; CHECK-LABEL: {{^}}kernel2:
-; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel2.num_agpr, .Lkernel2.num_vgpr), 1, 0)
-; CHECK-NEXT: .amdhsa_next_free_sgpr (max(.Lkernel2.numbered_sgpr+(extrasgprs(.Lkernel2.uses_vcc, .Lkernel2.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(.Lkernel2.uses_vcc, .Lkernel2.uses_flat_scratch, 1))
+; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel2.num_agpr, kernel2.num_vgpr), 1, 0)
+; CHECK-NEXT: .amdhsa_next_free_sgpr (max(kernel2.numbered_sgpr+(extrasgprs(kernel2.uses_vcc, kernel2.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel2.uses_vcc, kernel2.uses_flat_scratch, 1))
-; CHECK: .set kernel2.num_vgpr, max(41, aliasee_vgpr64_sgpr102.num_vgpr)
-; CHECK-NEXT: .set kernel2.num_agpr, max(0, aliasee_vgpr64_sgpr102.num_agpr)
-; CHECK-NEXT: .set kernel2.numbered_sgpr, max(33, aliasee_vgpr64_sgpr102.numbered_sgpr)
+; CHECK: .set kernel2.num_vgpr, max(41, .Laliasee_vgpr64_sgpr102.num_vgpr)
+; CHECK-NEXT: .set kernel2.num_agpr, max(0, .Laliasee_vgpr64_sgpr102.num_agpr)
+; CHECK-NEXT: .set kernel2.numbered_sgpr, max(33, .Laliasee_vgpr64_sgpr102.numbered_sgpr)
define amdgpu_kernel void @kernel2() #0 {
bb:
call void @alias2() #2
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
index dd17b099804b67..43dd0a7233604e 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
@@ -7,12 +7,12 @@
@alias3 = hidden alias void (), ptr @aliasee_vgpr256_sgpr102
; CHECK-LABEL: {{^}}kernel3:
-; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel3.num_agpr, .Lkernel3.num_vgpr), 1, 0)
-; CHECK-NEXT: .amdhsa_next_free_sgpr (max(.Lkernel3.numbered_sgpr+(extrasgprs(.Lkernel3.uses_vcc, .Lkernel3.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(.Lkernel3.uses_vcc, .Lkernel3.uses_flat_scratch, 1))
+; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel3.num_agpr, kernel3.num_vgpr), 1, 0)
+; CHECK-NEXT: .amdhsa_next_free_sgpr (max(kernel3.numbered_sgpr+(extrasgprs(kernel3.uses_vcc, kernel3.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel3.uses_vcc, kernel3.uses_flat_scratch, 1))
-; CHECK: .set kernel3.num_vgpr, max(41, aliasee_vgpr256_sgpr102.num_vgpr)
-; CHECK-NEXT: .set kernel3.num_agpr, max(0, aliasee_vgpr256_sgpr102.num_agpr)
-; CHECK-NEXT: .set kernel3.numbered_sgpr, max(33, aliasee_vgpr256_sgpr102.numbered_sgpr)
+; CHECK: .set kernel3.num_vgpr, max(41, .Laliasee_vgpr256_sgpr102.num_vgpr)
+; CHECK-NEXT: .set kernel3.num_agpr, max(0, .Laliasee_vgpr256_sgpr102.num_agpr)
+; CHECK-NEXT: .set kernel3.numbered_sgpr, max(33, .Laliasee_vgpr256_sgpr102.numbered_sgpr)
define amdgpu_kernel void @kernel3() #0 {
bb:
call void @alias3() #2
diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
index 61450ab655b86a..dbd00f09943c01 100644
--- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
@@ -234,11 +234,11 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
; Make sure there's no assert when a sgpr96 is used.
; GCN-LABEL: {{^}}count_use_sgpr96_external_call
; GCN: ; sgpr96 s[{{[0-9]+}}:{{[0-9]+}}]
-; GCN: .set .Lcount_use_sgpr96_external_call.num_vgpr, max(0, .Lamdgpu.max_num_vgpr)
-; GCN: .set .Lcount_use_sgpr96_external_call.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
-; CI: TotalNumSgprs: .Lcount_use_sgpr96_external_call.numbered_sgpr+4
+; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(0, amdgpu.max_num_vgpr)
+; GCN: .set count_use_sgpr96_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
+; CI: TotalNumSgprs: count_use_sgpr96_external_call.numbered_sgpr+4
; VI-BUG: TotalNumSgprs: 96
-; GCN: NumVgprs: .Lcount_use_sgpr96_external_call.num_vgpr
+; GCN: NumVgprs: count_use_sgpr96_external_call.num_vgpr
define amdgpu_kernel void @count_use_sgpr96_external_call() {
entry:
tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1
@@ -249,11 +249,11 @@ entry:
; Make sure there's no assert when a sgpr160 is used.
; GCN-LABEL: {{^}}count_use_sgpr160_external_call
; GCN: ; sgpr160 s[{{[0-9]+}}:{{[0-9]+}}]
-; GCN: .set .Lcount_use_sgpr160_external_call.num_vgpr, max(0, .Lamdgpu.max_num_vgpr)
-; GCN: .set .Lcount_use_sgpr160_external_call.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
-; CI: TotalNumSgprs: .Lcount_use_sgpr160_external_call.numbered_sgpr+4
+; GCN: .set count_use_sgpr160_external_call.num_vgpr, max(0, amdgpu.max_num_vgpr)
+; GCN: .set count_use_sgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
+; CI: TotalNumSgprs: count_use_sgpr160_external_call.numbered_sgpr+4
; VI-BUG: TotalNumSgprs: 96
-; GCN: NumVgprs: .Lcount_use_sgpr160_external_call.num_vgpr
+; GCN: NumVgprs: count_use_sgpr160_external_call.num_vgpr
define amdgpu_kernel void @count_use_sgpr160_external_call() {
entry:
tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
@@ -264,11 +264,11 @@ entry:
; Make sure there's no assert when a vgpr160 is used.
; GCN-LABEL: {{^}}count_use_vgpr160_external_call
; GCN: ; vgpr160 v[{{[0-9]+}}:{{[0-9]+}}]
-; GCN: .set .Lcount_use_vgpr160_external_call.num_vgpr, max(5, .Lamdgpu.max_num_vgpr)
-; GCN: .set .Lcount_use_vgpr160_external_call.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
-; CI: TotalNumSgprs: .Lcount_use_vgpr160_external_call.numbered_sgpr+4
+; GCN: .set count_use_vgpr160_external_call.num_vgpr, max(5, amdgpu.max_num_vgpr)
+; GCN: .set count_use_vgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
+; CI: TotalNumSgprs: count_use_vgpr160_external_call.numbered_sgpr+4
; VI-BUG: TotalNumSgprs: 96
-; GCN: NumVgprs: .Lcount_use_vgpr160_external_call.num_vgpr
+; GCN: NumVgprs: count_use_vgpr160_external_call.num_vgpr
define amdgpu_kernel void @count_use_vgpr160_external_call() {
entry:
tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
@@ -276,9 +276,9 @@ entry:
ret void
}
-; GCN: .set .Lamdgpu.max_num_vgpr, 50
-; GCN: .set .Lamdgpu.max_num_agpr, 0
-; GCN: .set .Lamdgpu.max_num_sgpr, 80
+; GCN: .set amdgpu.max_num_vgpr, 50
+; GCN: .set amdgpu.max_num_agpr, 0
+; GCN: .set amdgpu.max_num_sgpr, 80
; GCN-LABEL: amdhsa.kernels:
; GCN: .name: count_use_sgpr96_external_call
diff --git a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
index d0143ac2981ca3..22257d3eba7d63 100644
--- a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
@@ -1,29 +1,17 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -filetype=obj < %s > %t
-; RUN: llvm-objdump --syms %t | FileCheck -check-prefix=OBJ %s
-
-; OBJ-NOT: num_vgpr
-; OBJ-NOT: num_agpr
-; OBJ-NOT: numbered_sgpr
-; OBJ-NOT: private_seg_size
-; OBJ-NOT: uses_vcc
-; OBJ-NOT: uses_flat_scratch
-; OBJ-NOT: has_dyn_sized_stack
-; OBJ-NOT: has_recursion
-; OBJ-NOT: has_indirect_call
; Functions that don't make calls should have constants as its resource usage as no resource information has to be propagated.
; GCN-LABEL: {{^}}use_vcc:
-; GCN: .set .Luse_vcc.num_vgpr, 0
-; GCN: .set .Luse_vcc.num_agpr, 0
-; GCN: .set .Luse_vcc.numbered_sgpr, 32
-; GCN: .set .Luse_vcc.private_seg_size, 0
-; GCN: .set .Luse_vcc.uses_vcc, 1
-; GCN: .set .Luse_vcc.uses_flat_scratch, 0
-; GCN: .set .Luse_vcc.has_dyn_sized_stack, 0
-; GCN: .set .Luse_vcc.has_recursion, 0
-; GCN: .set .Luse_vcc.has_indirect_call, 0
+; GCN: .set use_vcc.num_vgpr, 0
+; GCN: .set use_vcc.num_agpr, 0
+; GCN: .set use_vcc.numbered_sgpr, 32
+; GCN: .set use_vcc.private_seg_size, 0
+; GCN: .set use_vcc.uses_vcc, 1
+; GCN: .set use_vcc.uses_flat_scratch, 0
+; GCN: .set use_vcc.has_dyn_sized_stack, 0
+; GCN: .set use_vcc.has_recursion, 0
+; GCN: .set use_vcc.has_indirect_call, 0
; GCN: TotalNumSgprs: 36
; GCN: NumVgprs: 0
; GCN: ScratchSize: 0
@@ -33,15 +21,15 @@ define void @use_vcc() #1 {
}
; GCN-LABEL: {{^}}indirect_use_vcc:
-; GCN: .set .Lindirect_use_vcc.num_vgpr, max(41, .Luse_vcc.num_vgpr)
-; GCN: .set .Lindirect_use_vcc.num_agpr, max(0, .Luse_vcc.num_agpr)
-; GCN: .set .Lindirect_use_vcc.numbered_sgpr, max(34, .Luse_vcc.numbered_sgpr)
-; GCN: .set .Lindirect_use_vcc.private_seg_size, 16+(max(.Luse_vcc.private_seg_size))
-; GCN: .set .Lindirect_use_vcc.uses_vcc, or(1, .Luse_vcc.uses_vcc)
-; GCN: .set .Lindirect_use_vcc.uses_flat_scratch, or(0, .Luse_vcc.uses_flat_scratch)
-; GCN: .set .Lindirect_use_vcc.has_dyn_sized_stack, or(0, .Luse_vcc.has_dyn_sized_stack)
-; GCN: .set .Lindirect_use_vcc.has_recursion, or(0, .Luse_vcc.has_recursion)
-; GCN: .set .Lindirect_use_vcc.has_indirect_call, or(0, .Luse_vcc.has_indirect_call)
+; GCN: .set indirect_use_vcc.num_vgpr, max(41, use_vcc.num_vgpr)
+; GCN: .set indirect_use_vcc.num_agpr, max(0, use_vcc.num_agpr)
+; GCN: .set indirect_use_vcc.numbered_sgpr, max(34, use_vcc.numbered_sgpr)
+; GCN: .set indirect_use_vcc.private_seg_size, 16+(max(use_vcc.private_seg_size))
+; GCN: .set indirect_use_vcc.uses_vcc, or(1, use_vcc.uses_vcc)
+; GCN: .set indirect_use_vcc.uses_flat_scratch, or(0, use_vcc.uses_flat_scratch)
+; GCN: .set indirect_use_vcc.has_dyn_sized_stack, or(0, use_vcc.has_dyn_sized_stack)
+; GCN: .set indirect_use_vcc.has_recursion, or(0, use_vcc.has_recursion)
+; GCN: .set indirect_use_vcc.has_indirect_call, or(0, use_vcc.has_indirect_call)
; GCN: TotalNumSgprs: 38
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
@@ -51,15 +39,15 @@ define void @indirect_use_vcc() #1 {
}
; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel:
-; GCN: .set .Lindirect_2level_use_vcc_kernel.num_vgpr, max(32, .Lindirect_use_vcc.num_vgpr)
-; GCN: .set .Lindirect_2level_use_vcc_kernel.num_agpr, max(0, .Lindirect_use_vcc.num_agpr)
-; GCN: .set .Lindirect_2level_use_vcc_kernel.numbered_sgpr, max(33, .Lindirect_use_vcc.numbered_sgpr)
-; GCN: .set .Lindirect_2level_use_vcc_kernel.private_seg_size, 0+(max(.Lindirect_use_vcc.private_seg_size))
-; GCN: .set .Lindirect_2level_use_vcc_kernel.uses_vcc, or(1, .Lindirect_use_vcc.uses_vcc)
-; GCN: .set .Lindirect_2level_use_vcc_kernel.uses_flat_scratch, or(1, .Lindirect_use_vcc.uses_flat_scratch)
-; GCN: .set .Lindirect_2level_use_vcc_kernel.has_dyn_sized_stack, or(0, .Lindirect_use_vcc.has_dyn_sized_stack)
-; GCN: .set .Lindirect_2level_use_vcc_kernel.has_recursion, or(0, .Lindirect_use_vcc.has_recursion)
-; GCN: .set .Lindirect_2level_use_vcc_kernel.has_indirect_call, or(0, .Lindirect_use_vcc.has_indirect_call)
+; GCN: .set indirect_2level_use_vcc_kernel.num_vgpr, max(32, indirect_use_vcc.num_vgpr)
+; GCN: .set indirect_2level_use_vcc_kernel.num_agpr, max(0, indirect_use_vcc.num_agpr)
+; GCN: .set indirect_2level_use_vcc_kernel.numbered_sgpr, max(33, indirect_use_vcc.numbered_sgpr)
+; GCN: .set indirect_2level_use_vcc_kernel.private_seg_size, 0+(max(indirect_use_vcc.private_seg_size))
+; GCN: .set indirect_2level_use_vcc_kernel.uses_vcc, or(1, indirect_use_vcc.uses_vcc)
+; GCN: .set indirect_2level_use_vcc_kernel.uses_flat_scratch, or(1, indirect_use_vcc.uses_flat_scratch)
+; GCN: .set indirect_2level_use_vcc_kernel.has_dyn_sized_stack, or(0, indirect_use_vcc.has_dyn_sized_stack)
+; GCN: .set indirect_2level_use_vcc_kernel.has_recursion, or(0, indirect_use_vcc.has_recursion)
+; GCN: .set indirect_2level_use_vcc_kernel.has_indirect_call, or(0, indirect_use_vcc.has_indirect_call)
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
@@ -69,15 +57,15 @@ define amdgpu_kernel void @indirect_2level_use_vcc_kernel(ptr addrspace(1) %out)
}
; GCN-LABEL: {{^}}use_flat_scratch:
-; GCN: .set .Luse_flat_scratch.num_vgpr, 0
-; GCN: .set .Luse_flat_scratch.num_agpr, 0
-; GCN: .set .Luse_flat_scratch.numbered_sgpr, 32
-; GCN: .set .Luse_flat_scratch.private_seg_size, 0
-; GCN: .set .Luse_flat_scratch.uses_vcc, 0
-; GCN: .set .Luse_flat_scratch.uses_flat_scratch, 1
-; GCN: .set .Luse_flat_scratch.has_dyn_sized_stack, 0
-; GCN: .set .Luse_flat_scratch.has_recursion, 0
-; GCN: .set .Luse_flat_scratch.has_indirect_call, 0
+; GCN: .set use_flat_scratch.num_vgpr, 0
+; GCN: .set use_flat_scratch.num_agpr, 0
+; GCN: .set use_flat_scratch.numbered_sgpr, 32
+; GCN: .set use_flat_scratch.private_seg_size, 0
+; GCN: .set use_flat_scratch.uses_vcc, 0
+; GCN: .set use_flat_scratch.uses_flat_scratch, 1
+; GCN: .set use_flat_scratch.has_dyn_sized_stack, 0
+; GCN: .set use_flat_scratch.has_recursion, 0
+; GCN: .set use_flat_scratch.has_indirect_call, 0
; GCN: TotalNumSgprs: 38
; GCN: NumVgprs: 0
; GCN: ScratchSize: 0
@@ -87,15 +75,15 @@ define void @use_flat_scratch() #1 {
}
; GCN-LABEL: {{^}}indirect_use_flat_scratch:
-; GCN: .set .Lindirect_use_flat_scratch.num_vgpr, max(41, .Luse_flat_scratch.num_vgpr)
-; GCN: .set .Lindirect_use_flat_scratch.num_agpr, max(0, .Luse_flat_scratch.num_agpr)
-; GCN: .set .Lindirect_use_flat_scratch.numbered_sgpr, max(34, .Luse_flat_scratch.numbered_sgpr)
-; GCN: .set .Lindirect_use_flat_scratch.private_seg_size, 16+(max(.Luse_flat_scratch.private_seg_size))
-; GCN: .set .Lindirect_use_flat_scratch.uses_vcc, or(1, .Luse_flat_scratch.uses_vcc)
-; GCN: .set .Lindirect_use_flat_scratch.uses_flat_scratch, or(0, .Luse_flat_scratch.uses_flat_scratch)
-; GCN: .set .Lindirect_use_flat_scratch.has_dyn_sized_stack, or(0, .Luse_flat_scratch.has_dyn_sized_stack)
-; GCN: .set .Lindirect_use_flat_scratch.has_recursion, or(0, .Luse_flat_scratch.has_recursion)
-; GCN: .set .Lindirect_use_flat_scratch.has_indirect_call, or(0, .Luse_flat_scratch.has_indirect_call)
+; GCN: .set indirect_use_flat_scratch.num_vgpr, max(41, use_flat_scratch.num_vgpr)
+; GCN: .set indirect_use_flat_scratch.num_agpr, max(0, use_flat_scratch.num_agpr)
+; GCN: .set indirect_use_flat_scratch.numbered_sgpr, max(34, use_flat_scratch.numbered_sgpr)
+; GCN: .set indirect_use_flat_scratch.private_seg_size, 16+(max(use_flat_scratch.private_seg_size))
+; GCN: .set indirect_use_flat_scratch.uses_vcc, or(1, use_flat_scratch.uses_vcc)
+; GCN: .set indirect_use_flat_scratch.uses_flat_scratch, or(0, use_flat_scratch.uses_flat_scratch)
+; GCN: .set indirect_use_flat_scratch.has_dyn_sized_stack, or(0, use_flat_scratch.has_dyn_sized_stack)
+; GCN: .set indirect_use_flat_scratch.has_recursion, or(0, use_flat_scratch.has_recursion)
+; GCN: .set indirect_use_flat_scratch.has_indirect_call, or(0, use_flat_scratch.has_indirect_call)
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
@@ -105,15 +93,15 @@ define void @indirect_use_flat_scratch() #1 {
}
; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel:
-; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.num_vgpr, max(32, .Lindirect_use_flat_scratch.num_vgpr)
-; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.num_agpr, max(0, .Lindirect_use_flat_scratch.num_agpr)
-; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.numbered_sgpr, max(33, .Lindirect_use_flat_scratch.numbered_sgpr)
-; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.private_seg_size, 0+(max(.Lindirect_use_flat_scratch.private_seg_size))
-; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.uses_vcc, or(1, .Lindirect_use_flat_scratch.uses_vcc)
-; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.uses_flat_scratch, or(1, .Lindirect_use_flat_scratch.uses_flat_scratch)
-; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.has_dyn_sized_stack, or(0, .Lindirect_use_flat_scratch.has_dyn_sized_stack)
-; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.has_recursion, or(0, .Lindirect_use_flat_scratch.has_recursion)
-; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.has_indirect_call, or(0, .Lindirect_use_flat_scratch.has_indirect_call)
+; GCN: .set indirect_2level_use_flat_scratch_kernel.num_vgpr, max(32, indirect_use_flat_scratch.num_vgpr)
+; GCN: .set indirect_2level_use_flat_scratch_kernel.num_agpr, max(0, indirect_use_flat_scratch.num_agpr)
+; GCN: .set indirect_2level_use_flat_scratch_kernel.numbered_sgpr, max(33, indirect_use_flat_scratch.numbered_sgpr)
+; GCN: .set indirect_2level_use_flat_scratch_kernel.private_seg_size, 0+(max(indirect_use_flat_scratch.private_seg_size))
+; GCN: .set indirect_2level_use_flat_scratch_kernel.uses_vcc, or(1, indirect_use_flat_scratch.uses_vcc)
+; GCN: .set indirect_2level_use_flat_scratch_kernel.uses_flat_scratch, or(1, indirect_use_flat_scratch.uses_flat_scratch)
+; GCN: .set indirect_2level_use_flat_scratch_kernel.has_dyn_sized_stack, or(0, indirect_use_flat_scratch.has_dyn_sized_stack)
+; GCN: .set indirect_2level_use_flat_scratch_kernel.has_recursion, or(0, indirect_use_flat_scratch.has_recursion)
+; GCN: .set indirect_2level_use_flat_scratch_kernel.has_indirect_call, or(0, indirect_use_flat_scratch.has_indirect_call)
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
@@ -123,15 +111,15 @@ define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(ptr addrspace
}
; GCN-LABEL: {{^}}use_10_vgpr:
-; GCN: .set .Luse_10_vgpr.num_vgpr, 10
-; GCN: .set .Luse_10_vgpr.num_agpr, 0
-; GCN: .set .Luse_10_vgpr.numbered_sgpr, 32
-; GCN: .set .Luse_10_vgpr.private_seg_size, 0
-; GCN: .set .Luse_10_vgpr.uses_vcc, 0
-; GCN: .set .Luse_10_vgpr.uses_flat_scratch, 0
-; GCN: .set .Luse_10_vgpr.has_dyn_sized_stack, 0
-; GCN: .set .Luse_10_vgpr.has_recursion, 0
-; GCN: .set .Luse_10_vgpr.has_indirect_call, 0
+; GCN: .set use_10_vgpr.num_vgpr, 10
+; GCN: .set use_10_vgpr.num_agpr, 0
+; GCN: .set use_10_vgpr.numbered_sgpr, 32
+; GCN: .set use_10_vgpr.private_seg_size, 0
+; GCN: .set use_10_vgpr.uses_vcc, 0
+; GCN: .set use_10_vgpr.uses_flat_scratch, 0
+; GCN: .set use_10_vgpr.has_dyn_sized_stack, 0
+; GCN: .set use_10_vgpr.has_recursion, 0
+; GCN: .set use_10_vgpr.has_indirect_call, 0
; GCN: TotalNumSgprs: 36
; GCN: NumVgprs: 10
; GCN: ScratchSize: 0
@@ -142,15 +130,15 @@ define void @use_10_vgpr() #1 {
}
; GCN-LABEL: {{^}}indirect_use_10_vgpr:
-; GCN: .set .Lindirect_use_10_vgpr.num_vgpr, max(41, .Luse_10_vgpr.num_vgpr)
-; GCN: .set .Lindirect_use_10_vgpr.num_agpr, max(0, .Luse_10_vgpr.num_agpr)
-; GCN: .set .Lindirect_use_10_vgpr.numbered_sgpr, max(34, .Luse_10_vgpr.numbered_sgpr)
-; GCN: .set .Lindirect_use_10_vgpr.private_seg_size, 16+(max(.Luse_10_vgpr.private_seg_size))
-; GCN: .set .Lindirect_use_10_vgpr.uses_vcc, or(1, .Luse_10_vgpr.uses_vcc)
-; GCN: .set .Lindirect_use_10_vgpr.uses_flat_scratch, or(0, .Luse_10_vgpr.uses_flat_scratch)
-; GCN: .set .Lindirect_use_10_vgpr.has_dyn_sized_stack, or(0, .Luse_10_vgpr.has_dyn_sized_stack)
-; GCN: .set .Lindirect_use_10_vgpr.has_recursion, or(0, .Luse_10_vgpr.has_recursion)
-; GCN: .set .Lindirect_use_10_vgpr.has_indirect_call, or(0, .Luse_10_vgpr.has_indirect_call)
+; GCN: .set indirect_use_10_vgpr.num_vgpr, max(41, use_10_vgpr.num_vgpr)
+; GCN: .set indirect_use_10_vgpr.num_agpr, max(0, use_10_vgpr.num_agpr)
+; GCN: .set indirect_use_10_vgpr.numbered_sgpr, max(34, use_10_vgpr.numbered_sgpr)
+; GCN: .set indirect_use_10_vgpr.private_seg_size, 16+(max(use_10_vgpr.private_seg_size))
+; GCN: .set indirect_use_10_vgpr.uses_vcc, or(1, use_10_vgpr.uses_vcc)
+; GCN: .set indirect_use_10_vgpr.uses_flat_scratch, or(0, use_10_vgpr.uses_flat_scratch)
+; GCN: .set indirect_use_10_vgpr.has_dyn_sized_stack, or(0, use_10_vgpr.has_dyn_sized_stack)
+; GCN: .set indirect_use_10_vgpr.has_recursion, or(0, use_10_vgpr.has_recursion)
+; GCN: .set indirect_use_10_vgpr.has_indirect_call, or(0, use_10_vgpr.has_indirect_call)
; GCN: TotalNumSgprs: 38
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
@@ -160,15 +148,15 @@ define void @indirect_use_10_vgpr() #0 {
}
; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr:
-; GCN: .set .Lindirect_2_level_use_10_vgpr.num_vgpr, max(32, .Lindirect_use_10_vgpr.num_vgpr)
-; GCN: .set .Lindirect_2_level_use_10_vgpr.num_agpr, max(0, .Lindirect_use_10_vgpr.num_agpr)
-; GCN: .set .Lindirect_2_level_use_10_vgpr.numbered_sgpr, max(33, .Lindirect_use_10_vgpr.numbered_sgpr)
-; GCN: .set .Lindirect_2_level_use_10_vgpr.private_seg_size, 0+(max(.Lindirect_use_10_vgpr.private_seg_size))
-; GCN: .set .Lindirect_2_level_use_10_vgpr.uses_vcc, or(1, .Lindirect_use_10_vgpr.uses_vcc)
-; GCN: .set .Lindirect_2_level_use_10_vgpr.uses_flat_scratch, or(1, .Lindirect_use_10_vgpr.uses_flat_scratch)
-; GCN: .set .Lindirect_2_level_use_10_vgpr.has_dyn_sized_stack, or(0, .Lindirect_use_10_vgpr.has_dyn_sized_stack)
-; GCN: .set .Lindirect_2_level_use_10_vgpr.has_recursion, or(0, .Lindirect_use_10_vgpr.has_recursion)
-; GCN: .set .Lindirect_2_level_use_10_vgpr.has_indirect_call, or(0, .Lindirect_use_10_vgpr.has_indirect_call)
+; GCN: .set indirect_2_level_use_10_vgpr.num_vgpr, max(32, indirect_use_10_vgpr.num_vgpr)
+; GCN: .set indirect_2_level_use_10_vgpr.num_agpr, max(0, indirect_use_10_vgpr.num_agpr)
+; GCN: .set indirect_2_level_use_10_vgpr.numbered_sgpr, max(33, indirect_use_10_vgpr.numbered_sgpr)
+; GCN: .set indirect_2_level_use_10_vgpr.private_seg_size, 0+(max(indirect_use_10_vgpr.private_seg_size))
+; GCN: .set indirect_2_level_use_10_vgpr.uses_vcc, or(1, indirect_use_10_vgpr.uses_vcc)
+; GCN: .set indirect_2_level_use_10_vgpr.uses_flat_scratch, or(1, indirect_use_10_vgpr.uses_flat_scratch)
+; GCN: .set indirect_2_level_use_10_vgpr.has_dyn_sized_stack, or(0, indirect_use_10_vgpr.has_dyn_sized_stack)
+; GCN: .set indirect_2_level_use_10_vgpr.has_recursion, or(0, indirect_use_10_vgpr.has_recursion)
+; GCN: .set indirect_2_level_use_10_vgpr.has_indirect_call, or(0, indirect_use_10_vgpr.has_indirect_call)
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
@@ -178,15 +166,15 @@ define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 {
}
; GCN-LABEL: {{^}}use_50_vgpr:
-; GCN: .set .Luse_50_vgpr.num_vgpr, 50
-; GCN: .set .Luse_50_vgpr.num_agpr, 0
-; GCN: .set .Luse_50_vgpr.numbered_sgpr, 32
-; GCN: .set .Luse_50_vgpr.private_seg_size, 0
-; GCN: .set .Luse_50_vgpr.uses_vcc, 0
-; GCN: .set .Luse_50_vgpr.uses_flat_scratch, 0
-; GCN: .set .Luse_50_vgpr.has_dyn_sized_stack, 0
-; GCN: .set .Luse_50_vgpr.has_recursion, 0
-; GCN: .set .Luse_50_vgpr.has_indirect_call, 0
+; GCN: .set use_50_vgpr.num_vgpr, 50
+; GCN: .set use_50_vgpr.num_agpr, 0
+; GCN: .set use_50_vgpr.numbered_sgpr, 32
+; GCN: .set use_50_vgpr.private_seg_size, 0
+; GCN: .set use_50_vgpr.uses_vcc, 0
+; GCN: .set use_50_vgpr.uses_flat_scratch, 0
+; GCN: .set use_50_vgpr.has_dyn_sized_stack, 0
+; GCN: .set use_50_vgpr.has_recursion, 0
+; GCN: .set use_50_vgpr.has_indirect_call, 0
; GCN: TotalNumSgprs: 36
; GCN: NumVgprs: 50
; GCN: ScratchSize: 0
@@ -196,15 +184,15 @@ define void @use_50_vgpr() #1 {
}
; GCN-LABEL: {{^}}indirect_use_50_vgpr:
-; GCN: .set .Lindirect_use_50_vgpr.num_vgpr, max(41, .Luse_50_vgpr.num_vgpr)
-; GCN: .set .Lindirect_use_50_vgpr.num_agpr, max(0, .Luse_50_vgpr.num_agpr)
-; GCN: .set .Lindirect_use_50_vgpr.numbered_sgpr, max(34, .Luse_50_vgpr.numbered_sgpr)
-; GCN: .set .Lindirect_use_50_vgpr.private_seg_size, 16+(max(.Luse_50_vgpr.private_seg_size))
-; GCN: .set .Lindirect_use_50_vgpr.uses_vcc, or(1, .Luse_50_vgpr.uses_vcc)
-; GCN: .set .Lindirect_use_50_vgpr.uses_flat_scratch, or(0, .Luse_50_vgpr.uses_flat_scratch)
-; GCN: .set .Lindirect_use_50_vgpr.has_dyn_sized_stack, or(0, .Luse_50_vgpr.has_dyn_sized_stack)
-; GCN: .set .Lindirect_use_50_vgpr.has_recursion, or(0, .Luse_50_vgpr.has_recursion)
-; GCN: .set .Lindirect_use_50_vgpr.has_indirect_call, or(0, .Luse_50_vgpr.has_indirect_call)
+; GCN: .set indirect_use_50_vgpr.num_vgpr, max(41, use_50_vgpr.num_vgpr)
+; GCN: .set indirect_use_50_vgpr.num_agpr, max(0, use_50_vgpr.num_agpr)
+; GCN: .set indirect_use_50_vgpr.numbered_sgpr, max(34, use_50_vgpr.numbered_sgpr)
+; GCN: .set indirect_use_50_vgpr.private_seg_size, 16+(max(use_50_vgpr.private_seg_size))
+; GCN: .set indirect_use_50_vgpr.uses_vcc, or(1, use_50_vgpr.uses_vcc)
+; GCN: .set indirect_use_50_vgpr.uses_flat_scratch, or(0, use_50_vgpr.uses_flat_scratch)
+; GCN: .set indirect_use_50_vgpr.has_dyn_sized_stack, or(0, use_50_vgpr.has_dyn_sized_stack)
+; GCN: .set indirect_use_50_vgpr.has_recursion, or(0, use_50_vgpr.has_recursion)
+; GCN: .set indirect_use_50_vgpr.has_indirect_call, or(0, use_50_vgpr.has_indirect_call)
; GCN: TotalNumSgprs: 38
; GCN: NumVgprs: 50
; GCN: ScratchSize: 16
@@ -214,15 +202,15 @@ define void @indirect_use_50_vgpr() #0 {
}
; GCN-LABEL: {{^}}use_80_sgpr:
-; GCN: .set .Luse_80_sgpr.num_vgpr, 1
-; GCN: .set .Luse_80_sgpr.num_agpr, 0
-; GCN: .set .Luse_80_sgpr.numbered_sgpr, 80
-; GCN: .set .Luse_80_sgpr.private_seg_size, 8
-; GCN: .set .Luse_80_sgpr.uses_vcc, 0
-; GCN: .set .Luse_80_sgpr.uses_flat_scratch, 0
-; GCN: .set .Luse_80_sgpr.has_dyn_sized_stack, 0
-; GCN: .set .Luse_80_sgpr.has_recursion, 0
-; GCN: .set .Luse_80_sgpr.has_indirect_call, 0
+; GCN: .set use_80_sgpr.num_vgpr, 1
+; GCN: .set use_80_sgpr.num_agpr, 0
+; GCN: .set use_80_sgpr.numbered_sgpr, 80
+; GCN: .set use_80_sgpr.private_seg_size, 8
+; GCN: .set use_80_sgpr.uses_vcc, 0
+; GCN: .set use_80_sgpr.uses_flat_scratch, 0
+; GCN: .set use_80_sgpr.has_dyn_sized_stack, 0
+; GCN: .set use_80_sgpr.has_recursion, 0
+; GCN: .set use_80_sgpr.has_indirect_call, 0
; GCN: TotalNumSgprs: 84
; GCN: NumVgprs: 1
; GCN: ScratchSize: 8
@@ -232,15 +220,15 @@ define void @use_80_sgpr() #1 {
}
; GCN-LABEL: {{^}}indirect_use_80_sgpr:
-; GCN: .set .Lindirect_use_80_sgpr.num_vgpr, max(41, .Luse_80_sgpr.num_vgpr)
-; GCN: .set .Lindirect_use_80_sgpr.num_agpr, max(0, .Luse_80_sgpr.num_agpr)
-; GCN: .set .Lindirect_use_80_sgpr.numbered_sgpr, max(34, .Luse_80_sgpr.numbered_sgpr)
-; GCN: .set .Lindirect_use_80_sgpr.private_seg_size, 16+(max(.Luse_80_sgpr.private_seg_size))
-; GCN: .set .Lindirect_use_80_sgpr.uses_vcc, or(1, .Luse_80_sgpr.uses_vcc)
-; GCN: .set .Lindirect_use_80_sgpr.uses_flat_scratch, or(0, .Luse_80_sgpr.uses_flat_scratch)
-; GCN: .set .Lindirect_use_80_sgpr.has_dyn_sized_stack, or(0, .Luse_80_sgpr.has_dyn_sized_stack)
-; GCN: .set .Lindirect_use_80_sgpr.has_recursion, or(0, .Luse_80_sgpr.has_recursion)
-; GCN: .set .Lindirect_use_80_sgpr.has_indirect_call, or(0, .Luse_80_sgpr.has_indirect_call)
+; GCN: .set indirect_use_80_sgpr.num_vgpr, max(41, use_80_sgpr.num_vgpr)
+; GCN: .set indirect_use_80_sgpr.num_agpr, max(0, use_80_sgpr.num_agpr)
+; GCN: .set indirect_use_80_sgpr.numbered_sgpr, max(34, use_80_sgpr.numbered_sgpr)
+; GCN: .set indirect_use_80_sgpr.private_seg_size, 16+(max(use_80_sgpr.private_seg_size))
+; GCN: .set indirect_use_80_sgpr.uses_vcc, or(1, use_80_sgpr.uses_vcc)
+; GCN: .set indirect_use_80_sgpr.uses_flat_scratch, or(0, use_80_sgpr.uses_flat_scratch)
+; GCN: .set indirect_use_80_sgpr.has_dyn_sized_stack, or(0, use_80_sgpr.has_dyn_sized_stack)
+; GCN: .set indirect_use_80_sgpr.has_recursion, or(0, use_80_sgpr.has_recursion)
+; GCN: .set indirect_use_80_sgpr.has_indirect_call, or(0, use_80_sgpr.has_indirect_call)
; GCN: TotalNumSgprs: 84
; GCN: NumVgprs: 41
; GCN: ScratchSize: 24
@@ -250,15 +238,15 @@ define void @indirect_use_80_sgpr() #1 {
}
; GCN-LABEL: {{^}}indirect_2_level_use_80_sgpr:
-; GCN: .set .Lindirect_2_level_use_80_sgpr.num_vgpr, max(32, .Lindirect_use_80_sgpr.num_vgpr)
-; GCN: .set .Lindirect_2_level_use_80_sgpr.num_agpr, max(0, .Lindirect_use_80_sgpr.num_agpr)
-; GCN: .set .Lindirect_2_level_use_80_sgpr.numbered_sgpr, max(33, .Lindirect_use_80_sgpr.numbered_sgpr)
-; GCN: .set .Lindirect_2_level_use_80_sgpr.private_seg_size, 0+(max(.Lindirect_use_80_sgpr.private_seg_size))
-; GCN: .set .Lindirect_2_level_use_80_sgpr.uses_vcc, or(1, .Lindirect_use_80_sgpr.uses_vcc)
-; GCN: .set .Lindirect_2_level_use_80_sgpr.uses_flat_scratch, or(1, .Lindirect_use_80_sgpr.uses_flat_scratch)
-; GCN: .set .Lindirect_2_level_use_80_sgpr.has_dyn_sized_stack, or(0, .Lindirect_use_80_sgpr.has_dyn_sized_stack)
-; GCN: .set .Lindirect_2_level_use_80_sgpr.has_recursion, or(0, .Lindirect_use_80_sgpr.has_recursion)
-; GCN: .set .Lindirect_2_level_use_80_sgpr.has_indirect_call, or(0, .Lindirect_use_80_sgpr.has_indirect_call)
+; GCN: .set indirect_2_level_use_80_sgpr.num_vgpr, max(32, indirect_use_80_sgpr.num_vgpr)
+; GCN: .set indirect_2_level_use_80_sgpr.num_agpr, max(0, indirect_use_80_sgpr.num_agpr)
+; GCN: .set indirect_2_level_use_80_sgpr.numbered_sgpr, max(33, indirect_use_80_sgpr.numbered_sgpr)
+; GCN: .set indirect_2_level_use_80_sgpr.private_seg_size, 0+(max(indirect_use_80_sgpr.private_seg_size))
+; GCN: .set indirect_2_level_use_80_sgpr.uses_vcc, or(1, indirect_use_80_sgpr.uses_vcc)
+; GCN: .set indirect_2_level_use_80_sgpr.uses_flat_scratch, or(1, indirect_use_80_sgpr.uses_flat_scratch)
+; GCN: .set indirect_2_level_use_80_sgpr.has_dyn_sized_stack, or(0, indirect_use_80_sgpr.has_dyn_sized_stack)
+; GCN: .set indirect_2_level_use_80_sgpr.has_recursion, or(0, indirect_use_80_sgpr.has_recursion)
+; GCN: .set indirect_2_level_use_80_sgpr.has_indirect_call, or(0, indirect_use_80_sgpr.has_indirect_call)
; GCN: TotalNumSgprs: 86
; GCN: NumVgprs: 41
; GCN: ScratchSize: 24
@@ -268,15 +256,15 @@ define amdgpu_kernel void @indirect_2_level_use_80_sgpr() #0 {
}
; GCN-LABEL: {{^}}use_stack0:
-; GCN: .set .Luse_stack0.num_vgpr, 1
-; GCN: .set .Luse_stack0.num_agpr, 0
-; GCN: .set .Luse_stack0.numbered_sgpr, 33
-; GCN: .set .Luse_stack0.private_seg_size, 2052
-; GCN: .set .Luse_stack0.uses_vcc, 0
-; GCN: .set .Luse_stack0.uses_flat_scratch, 0
-; GCN: .set .Luse_stack0.has_dyn_sized_stack, 0
-; GCN: .set .Luse_stack0.has_recursion, 0
-; GCN: .set .Luse_stack0.has_indirect_call, 0
+; GCN: .set use_stack0.num_vgpr, 1
+; GCN: .set use_stack0.num_agpr, 0
+; GCN: .set use_stack0.numbered_sgpr, 33
+; GCN: .set use_stack0.private_seg_size, 2052
+; GCN: .set use_stack0.uses_vcc, 0
+; GCN: .set use_stack0.uses_flat_scratch, 0
+; GCN: .set use_stack0.has_dyn_sized_stack, 0
+; GCN: .set use_stack0.has_recursion, 0
+; GCN: .set use_stack0.has_indirect_call, 0
; GCN: TotalNumSgprs: 37
; GCN: NumVgprs: 1
; GCN: ScratchSize: 2052
@@ -287,15 +275,15 @@ define void @use_stack0() #1 {
}
; GCN-LABEL: {{^}}use_stack1:
-; GCN: .set .Luse_stack1.num_vgpr, 1
-; GCN: .set .Luse_stack1.num_agpr, 0
-; GCN: .set .Luse_stack1.numbered_sgpr, 33
-; GCN: .set .Luse_stack1.private_seg_size, 404
-; GCN: .set .Luse_stack1.uses_vcc, 0
-; GCN: .set .Luse_stack1.uses_flat_scratch, 0
-; GCN: .set .Luse_stack1.has_dyn_sized_stack, 0
-; GCN: .set .Luse_stack1.has_recursion, 0
-; GCN: .set .Luse_stack1.has_indirect_call, 0
+; GCN: .set use_stack1.num_vgpr, 1
+; GCN: .set use_stack1.num_agpr, 0
+; GCN: .set use_stack1.numbered_sgpr, 33
+; GCN: .set use_stack1.private_seg_size, 404
+; GCN: .set use_stack1.uses_vcc, 0
+; GCN: .set use_stack1.uses_flat_scratch, 0
+; GCN: .set use_stack1.has_dyn_sized_stack, 0
+; GCN: .set use_stack1.has_recursion, 0
+; GCN: .set use_stack1.has_indirect_call, 0
; GCN: TotalNumSgprs: 37
; GCN: NumVgprs: 1
; GCN: ScratchSize: 404
@@ -306,15 +294,15 @@ define void @use_stack1() #1 {
}
; GCN-LABEL: {{^}}indirect_use_stack:
-; GCN: .set .Lindirect_use_stack.num_vgpr, max(41, .Luse_stack0.num_vgpr)
-; GCN: .set .Lindirect_use_stack.num_agpr, max(0, .Luse_stack0.num_agpr)
-; GCN: .set .Lindirect_use_stack.numbered_sgpr, max(34, .Luse_stack0.numbered_sgpr)
-; GCN: .set .Lindirect_use_stack.private_seg_size, 80+(max(.Luse_stack0.private_seg_size))
-; GCN: .set .Lindirect_use_stack.uses_vcc, or(1, .Luse_stack0.uses_vcc)
-; GCN: .set .Lindirect_use_stack.uses_flat_scratch, or(0, .Luse_stack0.uses_flat_scratch)
-; GCN: .set .Lindirect_use_stack.has_dyn_sized_stack, or(0, .Luse_stack0.has_dyn_sized_stack)
-; GCN: .set .Lindirect_use_stack.has_recursion, or(0, .Luse_stack0.has_recursion)
-; GCN: .set .Lindirect_use_stack.has_indirect_call, or(0, .Luse_stack0.has_indirect_call)
+; GCN: .set indirect_use_stack.num_vgpr, max(41, use_stack0.num_vgpr)
+; GCN: .set indirect_use_stack.num_agpr, max(0, use_stack0.num_agpr)
+; GCN: .set indirect_use_stack.numbered_sgpr, max(34, use_stack0.numbered_sgpr)
+; GCN: .set indirect_use_stack.private_seg_size, 80+(max(use_stack0.private_seg_size))
+; GCN: .set indirect_use_stack.uses_vcc, or(1, use_stack0.uses_vcc)
+; GCN: .set indirect_use_stack.uses_flat_scratch, or(0, use_stack0.uses_flat_scratch)
+; GCN: .set indirect_use_stack.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack)
+; GCN: .set indirect_use_stack.has_recursion, or(0, use_stack0.has_recursion)
+; GCN: .set indirect_use_stack.has_indirect_call, or(0, use_stack0.has_indirect_call)
; GCN: TotalNumSgprs: 38
; GCN: NumVgprs: 41
; GCN: ScratchSize: 2132
@@ -326,15 +314,15 @@ define void @indirect_use_stack() #1 {
}
; GCN-LABEL: {{^}}indirect_2_level_use_stack:
-; GCN: .set .Lindirect_2_level_use_stack.num_vgpr, max(32, .Lindirect_use_stack.num_vgpr)
-; GCN: .set .Lindirect_2_level_use_stack.num_agpr, max(0, .Lindirect_use_stack.num_agpr)
-; GCN: .set .Lindirect_2_level_use_stack.numbered_sgpr, max(33, .Lindirect_use_stack.numbered_sgpr)
-; GCN: .set .Lindirect_2_level_use_stack.private_seg_size, 0+(max(.Lindirect_use_stack.private_seg_size))
-; GCN: .set .Lindirect_2_level_use_stack.uses_vcc, or(1, .Lindirect_use_stack.uses_vcc)
-; GCN: .set .Lindirect_2_level_use_stack.uses_flat_scratch, or(1, .Lindirect_use_stack.uses_flat_scratch)
-; GCN: .set .Lindirect_2_level_use_stack.has_dyn_sized_stack, or(0, .Lindirect_use_stack.has_dyn_sized_stack)
-; GCN: .set .Lindirect_2_level_use_stack.has_recursion, or(0, .Lindirect_use_stack.has_recursion)
-; GCN: .set .Lindirect_2_level_use_stack.has_indirect_call, or(0, .Lindirect_use_stack.has_indirect_call)
+; GCN: .set indirect_2_level_use_stack.num_vgpr, max(32, indirect_use_stack.num_vgpr)
+; GCN: .set indirect_2_level_use_stack.num_agpr, max(0, indirect_use_stack.num_agpr)
+; GCN: .set indirect_2_level_use_stack.numbered_sgpr, max(33, indirect_use_stack.numbered_sgpr)
+; GCN: .set indirect_2_level_use_stack.private_seg_size, 0+(max(indirect_use_stack.private_seg_size))
+; GCN: .set indirect_2_level_use_stack.uses_vcc, or(1, indirect_use_stack.uses_vcc)
+; GCN: .set indirect_2_level_use_stack.uses_flat_scratch, or(1, indirect_use_stack.uses_flat_scratch)
+; GCN: .set indirect_2_level_use_stack.has_dyn_sized_stack, or(0, indirect_use_stack.has_dyn_sized_stack)
+; GCN: .set indirect_2_level_use_stack.has_recursion, or(0, indirect_use_stack.has_recursion)
+; GCN: .set indirect_2_level_use_stack.has_indirect_call, or(0, indirect_use_stack.has_indirect_call)
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 2132
@@ -410,17 +398,17 @@ define amdgpu_kernel void @multi_call_with_external_and_duplicates() #0 {
}
; GCN-LABEL: {{^}}usage_external:
-; GCN: .set .Lusage_external.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
-; GCN: .set .Lusage_external.num_agpr, max(0, .Lamdgpu.max_num_agpr)
-; GCN: .set .Lusage_external.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
-; GCN: .set .Lusage_external.private_seg_size, 0
-; GCN: .set .Lusage_external.uses_vcc, 1
-; GCN: .set .Lusage_external.uses_flat_scratch, 1
-; GCN: .set .Lusage_external.has_dyn_sized_stack, 1
-; GCN: .set .Lusage_external.has_recursion, 0
-; GCN: .set .Lusage_external.has_indirect_call, 1
-; GCN: TotalNumSgprs: .Lusage_external.numbered_sgpr+6
-; GCN: NumVgprs: .Lusage_external.num_vgpr
+; GCN: .set usage_external.num_vgpr, max(32, amdgpu.max_num_vgpr)
+; GCN: .set usage_external.num_agpr, max(0, amdgpu.max_num_agpr)
+; GCN: .set usage_external.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
+; GCN: .set usage_external.private_seg_size, 0
+; GCN: .set usage_external.uses_vcc, 1
+; GCN: .set usage_external.uses_flat_scratch, 1
+; GCN: .set usage_external.has_dyn_sized_stack, 1
+; GCN: .set usage_external.has_recursion, 0
+; GCN: .set usage_external.has_indirect_call, 1
+; GCN: TotalNumSgprs: usage_external.numbered_sgpr+6
+; GCN: NumVgprs: usage_external.num_vgpr
; GCN: ScratchSize: 0
define amdgpu_kernel void @usage_external() #0 {
call void @external()
@@ -430,17 +418,17 @@ define amdgpu_kernel void @usage_external() #0 {
declare void @external_recurse() #2
; GCN-LABEL: {{^}}usage_external_recurse:
-; GCN: .set .Lusage_external_recurse.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
-; GCN: .set .Lusage_external_recurse.num_agpr, max(0, .Lamdgpu.max_num_agpr)
-; GCN: .set .Lusage_external_recurse.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
-; GCN: .set .Lusage_external_recurse.private_seg_size, 0
-; GCN: .set .Lusage_external_recurse.uses_vcc, 1
-; GCN: .set .Lusage_external_recurse.uses_flat_scratch, 1
-; GCN: .set .Lusage_external_recurse.has_dyn_sized_stack, 1
-; GCN: .set .Lusage_external_recurse.has_recursion, 1
-; GCN: .set .Lusage_external_recurse.has_indirect_call, 1
-; GCN: TotalNumSgprs: .Lusage_external_recurse.numbered_sgpr+6
-; GCN: NumVgprs: .Lusage_external_recurse.num_vgpr
+; GCN: .set usage_external_recurse.num_vgpr, max(32, amdgpu.max_num_vgpr)
+; GCN: .set usage_external_recurse.num_agpr, max(0, amdgpu.max_num_agpr)
+; GCN: .set usage_external_recurse.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
+; GCN: .set usage_external_recurse.private_seg_size, 0
+; GCN: .set usage_external_recurse.uses_vcc, 1
+; GCN: .set usage_external_recurse.uses_flat_scratch, 1
+; GCN: .set usage_external_recurse.has_dyn_sized_stack, 1
+; GCN: .set usage_external_recurse.has_recursion, 1
+; GCN: .set usage_external_recurse.has_indirect_call, 1
+; GCN: TotalNumSgprs: usage_external_recurse.numbered_sgpr+6
+; GCN: NumVgprs: usage_external_recurse.num_vgpr
; GCN: ScratchSize: 0
define amdgpu_kernel void @usage_external_recurse() #0 {
call void @external_recurse()
@@ -448,15 +436,15 @@ define amdgpu_kernel void @usage_external_recurse() #0 {
}
; GCN-LABEL: {{^}}direct_recursion_use_stack:
-; GCN: .set .Ldirect_recursion_use_stack.num_vgpr, 41
-; GCN: .set .Ldirect_recursion_use_stack.num_agpr, 0
-; GCN: .set .Ldirect_recursion_use_stack.numbered_sgpr, 36
-; GCN: .set .Ldirect_recursion_use_stack.private_seg_size, 2064
-; GCN: .set .Ldirect_recursion_use_stack.uses_vcc, 1
-; GCN: .set .Ldirect_recursion_use_stack.uses_flat_scratch, 0
-; GCN: .set .Ldirect_recursion_use_stack.has_dyn_sized_stack, 0
-; GCN: .set .Ldirect_recursion_use_stack.has_recursion, 1
-; GCN: .set .Ldirect_recursion_use_stack.has_indirect_call, 0
+; GCN: .set direct_recursion_use_stack.num_vgpr, 41
+; GCN: .set direct_recursion_use_stack.num_agpr, 0
+; GCN: .set direct_recursion_use_stack.numbered_sgpr, 36
+; GCN: .set direct_recursion_use_stack.private_seg_size, 2064
+; GCN: .set direct_recursion_use_stack.uses_vcc, 1
+; GCN: .set direct_recursion_use_stack.uses_flat_scratch, 0
+; GCN: .set direct_recursion_use_stack.has_dyn_sized_stack, 0
+; GCN: .set direct_recursion_use_stack.has_recursion, 1
+; GCN: .set direct_recursion_use_stack.has_indirect_call, 0
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 2064
@@ -476,15 +464,15 @@ ret:
}
; GCN-LABEL: {{^}}usage_direct_recursion:
-; GCN: .set .Lusage_direct_recursion.num_vgpr, max(32, .Ldirect_recursion_use_stack.num_vgpr)
-; GCN: .set .Lusage_direct_recursion.num_agpr, max(0, .Ldirect_recursion_use_stack.num_agpr)
-; GCN: .set .Lusage_direct_recursion.numbered_sgpr, max(33, .Ldirect_recursion_use_stack.numbered_sgpr)
-; GCN: .set .Lusage_direct_recursion.private_seg_size, 0+(max(.Ldirect_recursion_use_stack.private_seg_size))
-; GCN: .set .Lusage_direct_recursion.uses_vcc, or(1, .Ldirect_recursion_use_stack.uses_vcc)
-; GCN: .set .Lusage_direct_recursion.uses_flat_scratch, or(1, .Ldirect_recursion_use_stack.uses_flat_scratch)
-; GCN: .set .Lusage_direct_recursion.has_dyn_sized_stack, or(0, .Ldirect_recursion_use_stack.has_dyn_sized_stack)
-; GCN: .set .Lusage_direct_recursion.has_recursion, or(1, .Ldirect_recursion_use_stack.has_recursion)
-; GCN: .set .Lusage_direct_recursion.has_indirect_call, or(0, .Ldirect_recursion_use_stack.has_indirect_call)
+; GCN: .set usage_direct_recursion.num_vgpr, max(32, direct_recursion_use_stack.num_vgpr)
+; GCN: .set usage_direct_recursion.num_agpr, max(0, direct_recursion_use_stack.num_agpr)
+; GCN: .set usage_direct_recursion.numbered_sgpr, max(33, direct_recursion_use_stack.numbered_sgpr)
+; GCN: .set usage_direct_recursion.private_seg_size, 0+(max(direct_recursion_use_stack.private_seg_size))
+; GCN: .set usage_direct_recursion.uses_vcc, or(1, direct_recursion_use_stack.uses_vcc)
+; GCN: .set usage_direct_recursion.uses_flat_scratch, or(1, direct_recursion_use_stack.uses_flat_scratch)
+; GCN: .set usage_direct_recursion.has_dyn_sized_stack, or(0, direct_recursion_use_stack.has_dyn_sized_stack)
+; GCN: .set usage_direct_recursion.has_recursion, or(1, direct_recursion_use_stack.has_recursion)
+; GCN: .set usage_direct_recursion.has_indirect_call, or(0, direct_recursion_use_stack.has_indirect_call)
; GCN: TotalNumSgprs: 42
; GCN: NumVgprs: 41
; GCN: ScratchSize: 2064
@@ -625,17 +613,17 @@ define amdgpu_kernel void @multi_call_with_multi_stage_recurse(i32 %n) #0 {
; Make sure there's no assert when a sgpr96 is used.
; GCN-LABEL: {{^}}count_use_sgpr96_external_call
-; GCN: .set .Lcount_use_sgpr96_external_call.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
-; GCN: .set .Lcount_use_sgpr96_external_call.num_agpr, max(0, .Lamdgpu.max_num_agpr)
-; GCN: .set .Lcount_use_sgpr96_external_call.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
-; GCN: .set .Lcount_use_sgpr96_external_call.private_seg_size, 0
-; GCN: .set .Lcount_use_sgpr96_external_call.uses_vcc, 1
-; GCN: .set .Lcount_use_sgpr96_external_call.uses_flat_scratch, 1
-; GCN: .set .Lcount_use_sgpr96_external_call.has_dyn_sized_stack, 1
-; GCN: .set .Lcount_use_sgpr96_external_call.has_recursion, 0
-; GCN: .set .Lcount_use_sgpr96_external_call.has_indirect_call, 1
-; GCN: TotalNumSgprs: .Lcount_use_sgpr96_external_call.numbered_sgpr+6
-; GCN: NumVgprs: .Lcount_use_sgpr96_external_call.num_vgpr
+; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
+; GCN: .set count_use_sgpr96_external_call.num_agpr, max(0, amdgpu.max_num_agpr)
+; GCN: .set count_use_sgpr96_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
+; GCN: .set count_use_sgpr96_external_call.private_seg_size, 0
+; GCN: .set count_use_sgpr96_external_call.uses_vcc, 1
+; GCN: .set count_use_sgpr96_external_call.uses_flat_scratch, 1
+; GCN: .set count_use_sgpr96_external_call.has_dyn_sized_stack, 1
+; GCN: .set count_use_sgpr96_external_call.has_recursion, 0
+; GCN: .set count_use_sgpr96_external_call.has_indirect_call, 1
+; GCN: TotalNumSgprs: count_use_sgpr96_external_call.numbered_sgpr+6
+; GCN: NumVgprs: count_use_sgpr96_external_call.num_vgpr
; GCN: ScratchSize: 0
define amdgpu_kernel void @count_use_sgpr96_external_call() {
entry:
@@ -646,17 +634,17 @@ entry:
; Make sure there's no assert when a sgpr160 is used.
; GCN-LABEL: {{^}}count_use_sgpr160_external_call
-; GCN: .set .Lcount_use_sgpr160_external_call.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
-; GCN: .set .Lcount_use_sgpr160_external_call.num_agpr, max(0, .Lamdgpu.max_num_agpr)
-; GCN: .set .Lcount_use_sgpr160_external_call.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
-; GCN: .set .Lcount_use_sgpr160_external_call.private_seg_size, 0
-; GCN: .set .Lcount_use_sgpr160_external_call.uses_vcc, 1
-; GCN: .set .Lcount_use_sgpr160_external_call.uses_flat_scratch, 1
-; GCN: .set .Lcount_use_sgpr160_external_call.has_dyn_sized_stack, 1
-; GCN: .set .Lcount_use_sgpr160_external_call.has_recursion, 0
-; GCN: .set .Lcount_use_sgpr160_external_call.has_indirect_call, 1
-; GCN: TotalNumSgprs: .Lcount_use_sgpr160_external_call.numbered_sgpr+6
-; GCN: NumVgprs: .Lcount_use_sgpr160_external_call.num_vgpr
+; GCN: .set count_use_sgpr160_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
+; GCN: .set count_use_sgpr160_external_call.num_agpr, max(0, amdgpu.max_num_agpr)
+; GCN: .set count_use_sgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
+; GCN: .set count_use_sgpr160_external_call.private_seg_size, 0
+; GCN: .set count_use_sgpr160_external_call.uses_vcc, 1
+; GCN: .set count_use_sgpr160_external_call.uses_flat_scratch, 1
+; GCN: .set count_use_sgpr160_external_call.has_dyn_sized_stack, 1
+; GCN: .set count_use_sgpr160_external_call.has_recursion, 0
+; GCN: .set count_use_sgpr160_external_call.has_indirect_call, 1
+; GCN: TotalNumSgprs: count_use_sgpr160_external_call.numbered_sgpr+6
+; GCN: NumVgprs: count_use_sgpr160_external_call.num_vgpr
; GCN: ScratchSize: 0
define amdgpu_kernel void @count_use_sgpr160_external_call() {
entry:
@@ -667,17 +655,17 @@ entry:
; Make sure there's no assert when a vgpr160 is used.
; GCN-LABEL: {{^}}count_use_vgpr160_external_call
-; GCN: .set .Lcount_use_vgpr160_external_call.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
-; GCN: .set .Lcount_use_vgpr160_external_call.num_agpr, max(0, .Lamdgpu.max_num_agpr)
-; GCN: .set .Lcount_use_vgpr160_external_call.numbered_sgpr, max(33, .Lamdgpu.max_num_sgpr)
-; GCN: .set .Lcount_use_vgpr160_external_call.private_seg_size, 0
-; GCN: .set .Lcount_use_vgpr160_external_call.uses_vcc, 1
-; GCN: .set .Lcount_use_vgpr160_external_call.uses_flat_scratch, 1
-; GCN: .set .Lcount_use_vgpr160_external_call.has_dyn_sized_stack, 1
-; GCN: .set .Lcount_use_vgpr160_external_call.has_recursion, 0
-; GCN: .set .Lcount_use_vgpr160_external_call.has_indirect_call, 1
-; GCN: TotalNumSgprs: .Lcount_use_vgpr160_external_call.numbered_sgpr+6
-; GCN: NumVgprs: .Lcount_use_vgpr160_external_call.num_vgpr
+; GCN: .set count_use_vgpr160_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
+; GCN: .set count_use_vgpr160_external_call.num_agpr, max(0, amdgpu.max_num_agpr)
+; GCN: .set count_use_vgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
+; GCN: .set count_use_vgpr160_external_call.private_seg_size, 0
+; GCN: .set count_use_vgpr160_external_call.uses_vcc, 1
+; GCN: .set count_use_vgpr160_external_call.uses_flat_scratch, 1
+; GCN: .set count_use_vgpr160_external_call.has_dyn_sized_stack, 1
+; GCN: .set count_use_vgpr160_external_call.has_recursion, 0
+; GCN: .set count_use_vgpr160_external_call.has_indirect_call, 1
+; GCN: TotalNumSgprs: count_use_vgpr160_external_call.numbered_sgpr+6
+; GCN: NumVgprs: count_use_vgpr160_external_call.num_vgpr
; GCN: ScratchSize: 0
define amdgpu_kernel void @count_use_vgpr160_external_call() {
entry:
@@ -687,9 +675,9 @@ entry:
}
; Added at the of the .s are the module level maximums
-; GCN: .set .Lamdgpu.max_num_vgpr, 50
-; GCN: .set .Lamdgpu.max_num_agpr, 0
-; GCN: .set .Lamdgpu.max_num_sgpr, 80
+; GCN: .set amdgpu.max_num_vgpr, 50
+; GCN: .set amdgpu.max_num_agpr, 0
+; GCN: .set amdgpu.max_num_sgpr, 80
attributes #0 = { nounwind noinline norecurse }
attributes #1 = { nounwind noinline norecurse }
diff --git a/llvm/test/CodeGen/AMDGPU/recursion.ll b/llvm/test/CodeGen/AMDGPU/recursion.ll
index c19029275329da..c0d228e1254e64 100644
--- a/llvm/test/CodeGen/AMDGPU/recursion.ll
+++ b/llvm/test/CodeGen/AMDGPU/recursion.ll
@@ -3,11 +3,11 @@
; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=V5 %s
; CHECK-LABEL: {{^}}recursive:
-; CHECK: .set .Lrecursive.private_seg_size, 16+(max(16384))
+; CHECK: .set recursive.private_seg_size, 16+(max(16384))
; CHECK: ScratchSize: 16
; V5-LABEL: {{^}}recursive:
-; V5: .set .Lrecursive.has_recursion, 1
+; V5: .set recursive.has_recursion, 1
define void @recursive() {
call void @recursive()
store volatile i32 0, ptr addrspace(1) undef
@@ -15,22 +15,22 @@ define void @recursive() {
}
; CHECK-LABEL: {{^}}tail_recursive:
-; CHECK: .set .Ltail_recursive.private_seg_size, 0
+; CHECK: .set tail_recursive.private_seg_size, 0
; CHECK: ScratchSize: 0
define void @tail_recursive() {
tail call void @tail_recursive()
ret void
}
-; CHECK: .set .Lcalls_tail_recursive.private_seg_size, 0+(max(.Ltail_recursive.private_seg_size))
+; CHECK: .set calls_tail_recursive.private_seg_size, 0+(max(tail_recursive.private_seg_size))
define void @calls_tail_recursive() norecurse {
tail call void @tail_recursive()
ret void
}
; CHECK-LABEL: {{^}}tail_recursive_with_stack:
-; CHECK: .set .Ltail_recursive_with_stack.private_seg_size, 8
-; CHECK: .set .Ltail_recursive_with_stack.has_recursion, 1
+; CHECK: .set tail_recursive_with_stack.private_seg_size, 8
+; CHECK: .set tail_recursive_with_stack.has_recursion, 1
define void @tail_recursive_with_stack() {
%alloca = alloca i32, addrspace(5)
store volatile i32 0, ptr addrspace(5) %alloca
@@ -41,11 +41,11 @@ define void @tail_recursive_with_stack() {
; For an arbitrary recursive call, report a large number for unknown stack
; usage for code object v4 and older
; CHECK-LABEL: {{^}}calls_recursive:
-; CHECK: .set .Lcalls_recursive.private_seg_size, 0+(max(16384, .Lrecursive.private_seg_size))
+; CHECK: .set calls_recursive.private_seg_size, 0+(max(16384, recursive.private_seg_size))
;
; V5-LABEL: {{^}}calls_recursive:
-; V5: .set .Lcalls_recursive.private_seg_size, 0+(max(.Lrecursive.private_seg_size))
-; V5: .set .Lcalls_recursive.has_dyn_sized_stack, or(0, .Lrecursive.has_dyn_sized_stack)
+; V5: .set calls_recursive.private_seg_size, 0+(max(recursive.private_seg_size))
+; V5: .set calls_recursive.has_dyn_sized_stack, or(0, recursive.has_dyn_sized_stack)
define amdgpu_kernel void @calls_recursive() {
call void @recursive()
ret void
@@ -54,7 +54,7 @@ define amdgpu_kernel void @calls_recursive() {
; Make sure we do not report a huge stack size for tail recursive
; functions
; CHECK-LABEL: {{^}}kernel_indirectly_calls_tail_recursive:
-; CHECK: .set .Lkernel_indirectly_calls_tail_recursive.private_seg_size, 0+(max(.Lcalls_tail_recursive.private_seg_size))
+; CHECK: .set kernel_indirectly_calls_tail_recursive.private_seg_size, 0+(max(calls_tail_recursive.private_seg_size))
define amdgpu_kernel void @kernel_indirectly_calls_tail_recursive() {
call void @calls_tail_recursive()
ret void
@@ -65,22 +65,22 @@ define amdgpu_kernel void @kernel_indirectly_calls_tail_recursive() {
; in the kernel.
; CHECK-LABEL: {{^}}kernel_calls_tail_recursive:
-; CHECK: .set .Lkernel_calls_tail_recursive.private_seg_size, 0+(max(16384, .Ltail_recursive.private_seg_size))
+; CHECK: .set kernel_calls_tail_recursive.private_seg_size, 0+(max(16384, tail_recursive.private_seg_size))
;
; V5-LABEL: {{^}}kernel_calls_tail_recursive:
-; V5: .set .Lkernel_calls_tail_recursive.private_seg_size, 0+(max(.Ltail_recursive.private_seg_size))
-; V5: .set .Lkernel_calls_tail_recursive.has_recursion, or(1, .Ltail_recursive.has_recursion)
+; V5: .set kernel_calls_tail_recursive.private_seg_size, 0+(max(tail_recursive.private_seg_size))
+; V5: .set kernel_calls_tail_recursive.has_recursion, or(1, tail_recursive.has_recursion)
define amdgpu_kernel void @kernel_calls_tail_recursive() {
call void @tail_recursive()
ret void
}
; CHECK-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
-; CHECK: .set .Lkernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(16384, .Ltail_recursive_with_stack.private_seg_size))
+; CHECK: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(16384, tail_recursive_with_stack.private_seg_size))
;
; V5-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
-; V5: .set .Lkernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(.Ltail_recursive_with_stack.private_seg_size))
-; V5: .set .Lkernel_calls_tail_recursive_with_stack.has_dyn_sized_stack, or(0, .Ltail_recursive_with_stack.has_dyn_sized_stack)
+; V5: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(tail_recursive_with_stack.private_seg_size))
+; V5: .set kernel_calls_tail_recursive_with_stack.has_dyn_sized_stack, or(0, tail_recursive_with_stack.has_dyn_sized_stack)
define amdgpu_kernel void @kernel_calls_tail_recursive_with_stack() {
call void @tail_recursive_with_stack()
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
index 849b1e2a7fce43..8bbae59f468f1d 100644
--- a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
+++ b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
@@ -141,12 +141,12 @@ define void @empty_func() !dbg !8 {
}
; STDERR: remark: foo.cl:64:0: Function Name: test_indirect_call
-; STDERR-NEXT: remark: foo.cl:64:0: TotalSGPRs: .Ltest_indirect_call.numbered_sgpr+6
-; STDERR-NEXT: remark: foo.cl:64:0: VGPRs: .Ltest_indirect_call.num_vgpr
-; STDERR-NEXT: remark: foo.cl:64:0: AGPRs: .Ltest_indirect_call.num_agpr
+; STDERR-NEXT: remark: foo.cl:64:0: TotalSGPRs: test_indirect_call.numbered_sgpr+6
+; STDERR-NEXT: remark: foo.cl:64:0: VGPRs: test_indirect_call.num_vgpr
+; STDERR-NEXT: remark: foo.cl:64:0: AGPRs: test_indirect_call.num_agpr
; STDERR-NEXT: remark: foo.cl:64:0: ScratchSize [bytes/lane]: 0
; STDERR-NEXT: remark: foo.cl:64:0: Dynamic Stack: True
-; STDERR-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 8, max(.Ltest_indirect_call.numbered_sgpr+(extrasgprs(.Ltest_indirect_call.uses_vcc, .Ltest_indirect_call.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(.Ltest_indirect_call.num_agpr, .Ltest_indirect_call.num_vgpr), 1, 0))
+; STDERR-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 8, max(test_indirect_call.numbered_sgpr+(extrasgprs(test_indirect_call.uses_vcc, test_indirect_call.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(test_indirect_call.num_agpr, test_indirect_call.num_vgpr), 1, 0))
; STDERR-NEXT: remark: foo.cl:64:0: SGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:64:0: VGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:64:0: LDS Size [bytes/block]: 0
@@ -159,12 +159,12 @@ define amdgpu_kernel void @test_indirect_call() !dbg !9 {
}
; STDERR: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack
-; STDERR-NEXT: remark: foo.cl:74:0: TotalSGPRs: .Ltest_indirect_w_static_stack.numbered_sgpr+6
-; STDERR-NEXT: remark: foo.cl:74:0: VGPRs: .Ltest_indirect_w_static_stack.num_vgpr
-; STDERR-NEXT: remark: foo.cl:74:0: AGPRs: .Ltest_indirect_w_static_stack.num_agpr
+; STDERR-NEXT: remark: foo.cl:74:0: TotalSGPRs: test_indirect_w_static_stack.numbered_sgpr+6
+; STDERR-NEXT: remark: foo.cl:74:0: VGPRs: test_indirect_w_static_stack.num_vgpr
+; STDERR-NEXT: remark: foo.cl:74:0: AGPRs: test_indirect_w_static_stack.num_agpr
; STDERR-NEXT: remark: foo.cl:74:0: ScratchSize [bytes/lane]: 144
; STDERR-NEXT: remark: foo.cl:74:0: Dynamic Stack: True
-; STDERR-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 8, max(.Ltest_indirect_w_static_stack.numbered_sgpr+(extrasgprs(.Ltest_indirect_w_static_stack.uses_vcc, .Ltest_indirect_w_static_stack.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(.Ltest_indirect_w_static_stack.num_agpr, .Ltest_indirect_w_static_stack.num_vgpr), 1, 0))
+; STDERR-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 8, max(test_indirect_w_static_stack.numbered_sgpr+(extrasgprs(test_indirect_w_static_stack.uses_vcc, test_indirect_w_static_stack.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(test_indirect_w_static_stack.num_agpr, test_indirect_w_static_stack.num_vgpr), 1, 0))
; STDERR-NEXT: remark: foo.cl:74:0: SGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:74:0: VGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:74:0: LDS Size [bytes/block]: 0
diff --git a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
index bd7b473fd806f2..5d5aad76afd095 100644
--- a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
+++ b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
@@ -23,8 +23,8 @@ define internal fastcc void @unreachable() {
; GCN-NOT: .amdhsa_uses_dynamic_stack
; GCN-V5: .amdhsa_uses_dynamic_stack
-; ALL: .set .Lentry.private_seg_size, 0
-; ALL: .set .Lentry.has_dyn_sized_stack, 0
+; ALL: .set entry.private_seg_size, 0
+; ALL: .set entry.has_dyn_sized_stack, 0
define amdgpu_kernel void @entry() {
bb0:
br i1 false, label %bb1, label %bb2
More information about the llvm-commits
mailing list