[llvm] db646de - [AMDGPU] Set optional PAL metadata
Sebastian Neubauer via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 6 02:59:46 PDT 2021
Author: Sebastian Neubauer
Date: 2021-07-06T11:58:00+02:00
New Revision: db646de3ee0181c93744b69cb51baeff17d70a00
URL: https://github.com/llvm/llvm-project/commit/db646de3ee0181c93744b69cb51baeff17d70a00
DIFF: https://github.com/llvm/llvm-project/commit/db646de3ee0181c93744b69cb51baeff17d70a00.diff
LOG: [AMDGPU] Set optional PAL metadata
Set informational fields in the .shader_functions table.
Also correct the documentation, .scratch_memory_size and .lds_size are
integers.
Differential Revision: https://reviews.llvm.org/D105116
Added:
Modified:
llvm/docs/AMDGPUUsage.rst
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
Removed:
################################################################################
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index e63af4a5e8206..ce7eb31e4634e 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -11251,10 +11251,8 @@ within a map that has been added by the same *vendor-name*.
".api_shader_hash" sequence of Input shader hash, typically passed in from the client. The value
2 integers is implementation defined, and can not be relied on between
diff erent builds of the compiler.
- ".scratch_memory_size" sequence of Size in bytes of scratch memory used by the shader.
- 2 integers
- ".lds_size" sequence of Size in bytes of LDS memory.
- 2 integers
+ ".scratch_memory_size" integer Size in bytes of scratch memory used by the shader.
+ ".lds_size" integer Size in bytes of LDS memory.
".vgpr_count" integer Number of VGPRs used by the shader.
".sgpr_count" integer Number of SGPRs used by the shader.
".stack_frame_size_in_bytes" integer Amount of stack size used by the shader.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 3a36c1d123ec0..c36b1045c169b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1395,10 +1395,16 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
auto *MD = getTargetStreamer()->getPALMetadata();
const MachineFrameInfo &MFI = MF.getFrameInfo();
MD->setFunctionScratchSize(MF, MFI.getStackSize());
+
// Set compute registers
MD->setRsrc1(CallingConv::AMDGPU_CS,
CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS));
MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.ComputePGMRSrc2);
+
+ // Set optional info
+ MD->setFunctionLdsSize(MF, CurrentProgramInfo.LDSSize);
+ MD->setFunctionNumUsedVgprs(MF, CurrentProgramInfo.NumVGPRsForWavesPerEU);
+ MD->setFunctionNumUsedSgprs(MF, CurrentProgramInfo.NumSGPRsForWavesPerEU);
}
// This is supposed to be log2(Size)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
index c876be31c3529..f6b5975f19347 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -243,6 +243,27 @@ void AMDGPUPALMetadata::setFunctionScratchSize(const MachineFunction &MF,
Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val);
}
+// Set the amount of LDS used in bytes in the metadata.
+void AMDGPUPALMetadata::setFunctionLdsSize(const MachineFunction &MF,
+ unsigned Val) {
+ auto Node = getShaderFunction(MF.getFunction().getName());
+ Node[".lds_size"] = MsgPackDoc.getNode(Val);
+}
+
+// Set the number of used vgprs in the metadata.
+void AMDGPUPALMetadata::setFunctionNumUsedVgprs(const MachineFunction &MF,
+ unsigned Val) {
+ auto Node = getShaderFunction(MF.getFunction().getName());
+ Node[".vgpr_count"] = MsgPackDoc.getNode(Val);
+}
+
+// Set the number of used vgprs in the metadata.
+void AMDGPUPALMetadata::setFunctionNumUsedSgprs(const MachineFunction &MF,
+ unsigned Val) {
+ auto Node = getShaderFunction(MF.getFunction().getName());
+ Node[".sgpr_count"] = MsgPackDoc.getNode(Val);
+}
+
// Set the hardware register bit in PAL metadata to enable wave32 on the
// shader of the given calling convention.
void AMDGPUPALMetadata::setWave32(unsigned CC) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
index 0c272da1a55bd..7fdd9a8429c15 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
@@ -80,6 +80,21 @@ class AMDGPUPALMetadata {
// Set the stack frame size of a function in the metadata.
void setFunctionScratchSize(const MachineFunction &MF, unsigned Val);
+ // Set the amount of LDS used in bytes in the metadata. This is an optional
+ // advisory record for logging etc; wave dispatch actually uses the rsrc1
+ // register for the shader stage to determine the amount of LDS to allocate.
+ void setFunctionLdsSize(const MachineFunction &MF, unsigned Val);
+
+ // Set the number of used vgprs in the metadata. This is an optional advisory
+ // record for logging etc; wave dispatch actually uses the rsrc1 register for
+ // the shader stage to determine the number of vgprs to allocate.
+ void setFunctionNumUsedVgprs(const MachineFunction &MF, unsigned Val);
+
+ // Set the number of used sgprs in the metadata. This is an optional advisory
+ // record for logging etc; wave dispatch actually uses the rsrc1 register for
+ // the shader stage to determine the number of sgprs to allocate.
+ void setFunctionNumUsedSgprs(const MachineFunction &MF, unsigned Val);
+
// Set the hardware register bit in PAL metadata to enable wave32 on the
// shader of the given calling convention.
void setWave32(unsigned CC);
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
index 7ecc73406e1eb..4315bb4a10c6f 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
@@ -1,9 +1,9 @@
-; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
-; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL -enable-var-scope %s
+; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
+; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s
; Make sure this interacts well with -amdgpu-fixed-function-abi
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgpu-fixed-function-abi -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgpu-fixed-function-abi -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
declare float @extern_func(float) #0
declare float @extern_func_many_args(<64 x float>) #0
@@ -147,40 +147,92 @@ attributes #0 = { nounwind }
; GCN: amdpal.pipelines:
; GCN-NEXT: - .registers:
-; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
-; SDAG-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
-; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
-; GISEL-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
+; GCN-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
+; GCN-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
; GCN-NEXT: .shader_functions:
; GCN-NEXT: dynamic_stack:
+; GCN-NEXT: .lds_size: 0{{$}}
+; GCN-NEXT: .sgpr_count: 0x24{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
+; SDAG-NEXT: .vgpr_count: 0x2{{$}}
+; GISEL-NEXT: .vgpr_count: 0x3{{$}}
; GCN-NEXT: dynamic_stack_loop:
+; GCN-NEXT: .lds_size: 0{{$}}
+; SDAG-NEXT: .sgpr_count: 0x22{{$}}
+; GISEL-NEXT: .sgpr_count: 0x24{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
+; SDAG-NEXT: .vgpr_count: 0x3{{$}}
+; GISEL-NEXT: .vgpr_count: 0x4{{$}}
; GCN-NEXT: multiple_stack:
+; GCN-NEXT: .lds_size: 0{{$}}
+; GCN-NEXT: .sgpr_count: 0x21{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x24{{$}}
+; GCN-NEXT: .vgpr_count: 0x3{{$}}
; GCN-NEXT: no_stack:
+; GCN-NEXT: .lds_size: 0{{$}}
+; GCN-NEXT: .sgpr_count: 0x20{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
+; GCN-NEXT: .vgpr_count: 0x1{{$}}
; GCN-NEXT: no_stack_call:
+; GCN-NEXT: .lds_size: 0{{$}}
+; GCN-NEXT: .sgpr_count: 0x20{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
+; GCN-NEXT: .vgpr_count: 0x1{{$}}
; GCN-NEXT: no_stack_extern_call:
+; GCN-NEXT: .lds_size: 0{{$}}
+; GFX8-NEXT: .sgpr_count: 0x68{{$}}
+; GFX9-NEXT: .sgpr_count: 0x66{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
+; GCN-NEXT: .vgpr_count: 0x40{{$}}
; GCN-NEXT: no_stack_extern_call_many_args:
-; SDAG-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
-; GISEL-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
+; GCN-NEXT: .lds_size: 0{{$}}
+; GFX8-NEXT: .sgpr_count: 0x68{{$}}
+; GFX9-NEXT: .sgpr_count: 0x66{{$}}
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
+; GCN-NEXT: .vgpr_count: 0x40{{$}}
; GCN-NEXT: no_stack_indirect_call:
+; GCN-NEXT: .lds_size: 0{{$}}
+; GFX8-NEXT: .sgpr_count: 0x68{{$}}
+; GFX9-NEXT: .sgpr_count: 0x66{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
+; GCN-NEXT: .vgpr_count: 0x40{{$}}
; GCN-NEXT: simple_lds:
+; GCN-NEXT: .lds_size: 0x100{{$}}
+; GCN-NEXT: .sgpr_count: 0x20{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
+; GCN-NEXT: .vgpr_count: 0x1{{$}}
; GCN-NEXT: simple_lds_recurse:
+; GCN-NEXT: .lds_size: 0x100{{$}}
+; GFX8-NEXT: .sgpr_count: 0x68{{$}}
+; GFX9-NEXT: .sgpr_count: 0x66{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
+; GCN-NEXT: .vgpr_count: 0x40{{$}}
; GCN-NEXT: simple_stack:
+; GCN-NEXT: .lds_size: 0{{$}}
+; GCN-NEXT: .sgpr_count: 0x21{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}}
+; GCN-NEXT: .vgpr_count: 0x2{{$}}
; GCN-NEXT: simple_stack_call:
+; GCN-NEXT: .lds_size: 0{{$}}
+; GCN-NEXT: .sgpr_count: 0x22{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
+; GCN-NEXT: .vgpr_count: 0x3{{$}}
; GCN-NEXT: simple_stack_extern_call:
+; GCN-NEXT: .lds_size: 0{{$}}
+; GFX8-NEXT: .sgpr_count: 0x68{{$}}
+; GFX9-NEXT: .sgpr_count: 0x66{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
+; GCN-NEXT: .vgpr_count: 0x40{{$}}
; GCN-NEXT: simple_stack_indirect_call:
+; GCN-NEXT: .lds_size: 0{{$}}
+; GFX8-NEXT: .sgpr_count: 0x68{{$}}
+; GFX9-NEXT: .sgpr_count: 0x66{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
+; GCN-NEXT: .vgpr_count: 0x40{{$}}
; GCN-NEXT: simple_stack_recurse:
+; GCN-NEXT: .lds_size: 0{{$}}
+; GFX8-NEXT: .sgpr_count: 0x68{{$}}
+; GFX9-NEXT: .sgpr_count: 0x66{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
+; GCN-NEXT: .vgpr_count: 0x40{{$}}
; GCN-NEXT: ...
More information about the llvm-commits
mailing list