[llvm] db646de - [AMDGPU] Set optional PAL metadata

Tue Jul 6 02:59:46 PDT 2021

Author: Sebastian Neubauer
Date: 2021-07-06T11:58:00+02:00
New Revision: db646de3ee0181c93744b69cb51baeff17d70a00

URL: https://github.com/llvm/llvm-project/commit/db646de3ee0181c93744b69cb51baeff17d70a00
DIFF: https://github.com/llvm/llvm-project/commit/db646de3ee0181c93744b69cb51baeff17d70a00.diff

LOG: [AMDGPU] Set optional PAL metadata

Set informational fields in the .shader_functions table.

Also correct the documentation, .scratch_memory_size and .lds_size are
integers.

Differential Revision: https://reviews.llvm.org/D105116

Added: 
    

Modified: 
    llvm/docs/AMDGPUUsage.rst
    llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
    llvm/test/CodeGen/AMDGPU/amdpal-callable.ll

Removed: 
    


################################################################################
diff  --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index e63af4a5e8206..ce7eb31e4634e 100644

--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -11251,10 +11251,8 @@ within a map that has been added by the same *vendor-name*.
      ".api_shader_hash"            sequence of    Input shader hash, typically passed in from the client. The value
                                    2 integers     is implementation defined, and can not be relied on between
                                                   
diff erent builds of the compiler.
-     ".scratch_memory_size"        sequence of    Size in bytes of scratch memory used by the shader.
-                                   2 integers
-     ".lds_size"                   sequence of    Size in bytes of LDS memory.
-                                   2 integers
+     ".scratch_memory_size"        integer        Size in bytes of scratch memory used by the shader.
+     ".lds_size"                   integer        Size in bytes of LDS memory.
      ".vgpr_count"                 integer        Number of VGPRs used by the shader.
      ".sgpr_count"                 integer        Number of SGPRs used by the shader.
      ".stack_frame_size_in_bytes"  integer        Amount of stack size used by the shader.

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 3a36c1d123ec0..c36b1045c169b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1395,10 +1395,16 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
   auto *MD = getTargetStreamer()->getPALMetadata();
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   MD->setFunctionScratchSize(MF, MFI.getStackSize());
+
   // Set compute registers
   MD->setRsrc1(CallingConv::AMDGPU_CS,
                CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS));
   MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.ComputePGMRSrc2);
+
+  // Set optional info
+  MD->setFunctionLdsSize(MF, CurrentProgramInfo.LDSSize);
+  MD->setFunctionNumUsedVgprs(MF, CurrentProgramInfo.NumVGPRsForWavesPerEU);
+  MD->setFunctionNumUsedSgprs(MF, CurrentProgramInfo.NumSGPRsForWavesPerEU);
 }
 
 // This is supposed to be log2(Size)

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
index c876be31c3529..f6b5975f19347 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -243,6 +243,27 @@ void AMDGPUPALMetadata::setFunctionScratchSize(const MachineFunction &MF,
   Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val);
 }
 
+// Set the amount of LDS used in bytes in the metadata.
+void AMDGPUPALMetadata::setFunctionLdsSize(const MachineFunction &MF,
+                                           unsigned Val) {
+  auto Node = getShaderFunction(MF.getFunction().getName());
+  Node[".lds_size"] = MsgPackDoc.getNode(Val);
+}
+
+// Set the number of used vgprs in the metadata.
+void AMDGPUPALMetadata::setFunctionNumUsedVgprs(const MachineFunction &MF,
+                                                unsigned Val) {
+  auto Node = getShaderFunction(MF.getFunction().getName());
+  Node[".vgpr_count"] = MsgPackDoc.getNode(Val);
+}
+
+// Set the number of used vgprs in the metadata.
+void AMDGPUPALMetadata::setFunctionNumUsedSgprs(const MachineFunction &MF,
+                                                unsigned Val) {
+  auto Node = getShaderFunction(MF.getFunction().getName());
+  Node[".sgpr_count"] = MsgPackDoc.getNode(Val);
+}
+
 // Set the hardware register bit in PAL metadata to enable wave32 on the
 // shader of the given calling convention.
 void AMDGPUPALMetadata::setWave32(unsigned CC) {

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
index 0c272da1a55bd..7fdd9a8429c15 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
@@ -80,6 +80,21 @@ class AMDGPUPALMetadata {
   // Set the stack frame size of a function in the metadata.
   void setFunctionScratchSize(const MachineFunction &MF, unsigned Val);
 
+  // Set the amount of LDS used in bytes in the metadata. This is an optional
+  // advisory record for logging etc; wave dispatch actually uses the rsrc1
+  // register for the shader stage to determine the amount of LDS to allocate.
+  void setFunctionLdsSize(const MachineFunction &MF, unsigned Val);
+
+  // Set the number of used vgprs in the metadata. This is an optional advisory
+  // record for logging etc; wave dispatch actually uses the rsrc1 register for
+  // the shader stage to determine the number of vgprs to allocate.
+  void setFunctionNumUsedVgprs(const MachineFunction &MF, unsigned Val);
+
+  // Set the number of used sgprs in the metadata. This is an optional advisory
+  // record for logging etc; wave dispatch actually uses the rsrc1 register for
+  // the shader stage to determine the number of sgprs to allocate.
+  void setFunctionNumUsedSgprs(const MachineFunction &MF, unsigned Val);
+
   // Set the hardware register bit in PAL metadata to enable wave32 on the
   // shader of the given calling convention.
   void setWave32(unsigned CC);

diff  --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
index 7ecc73406e1eb..4315bb4a10c6f 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
@@ -1,9 +1,9 @@
-; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
-; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL -enable-var-scope %s
+; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
+; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s
 
 ; Make sure this interacts well with -amdgpu-fixed-function-abi
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgpu-fixed-function-abi -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgpu-fixed-function-abi -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
 
 declare float @extern_func(float) #0
 declare float @extern_func_many_args(<64 x float>) #0
@@ -147,40 +147,92 @@ attributes #0 = { nounwind }
 
 ; GCN: amdpal.pipelines:
 ; GCN-NEXT:  - .registers:
-; SDAG-NEXT:      0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
-; SDAG-NEXT:      0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
-; GISEL-NEXT:      0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
-; GISEL-NEXT:      0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
+; GCN-NEXT:      0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
+; GCN-NEXT:      0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
 ; GCN-NEXT:    .shader_functions:
 ; GCN-NEXT:      dynamic_stack:
+; GCN-NEXT:        .lds_size:       0{{$}}
+; GCN-NEXT:        .sgpr_count:     0x24{{$}}
 ; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
+; SDAG-NEXT:        .vgpr_count:     0x2{{$}}
+; GISEL-NEXT:        .vgpr_count:     0x3{{$}}
 ; GCN-NEXT:      dynamic_stack_loop:
+; GCN-NEXT:        .lds_size:       0{{$}}
+; SDAG-NEXT:        .sgpr_count:     0x22{{$}}
+; GISEL-NEXT:        .sgpr_count:     0x24{{$}}
 ; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
+; SDAG-NEXT:        .vgpr_count:     0x3{{$}}
+; GISEL-NEXT:        .vgpr_count:     0x4{{$}}
 ; GCN-NEXT:      multiple_stack:
+; GCN-NEXT:        .lds_size:       0{{$}}
+; GCN-NEXT:        .sgpr_count:     0x21{{$}}
 ; GCN-NEXT:        .stack_frame_size_in_bytes: 0x24{{$}}
+; GCN-NEXT:        .vgpr_count:     0x3{{$}}
 ; GCN-NEXT:      no_stack:
+; GCN-NEXT:        .lds_size:       0{{$}}
+; GCN-NEXT:        .sgpr_count:     0x20{{$}}
 ; GCN-NEXT:        .stack_frame_size_in_bytes: 0{{$}}
+; GCN-NEXT:        .vgpr_count:     0x1{{$}}
 ; GCN-NEXT:      no_stack_call:
+; GCN-NEXT:        .lds_size:       0{{$}}
+; GCN-NEXT:        .sgpr_count:     0x20{{$}}
 ; GCN-NEXT:        .stack_frame_size_in_bytes: 0{{$}}
+; GCN-NEXT:        .vgpr_count:     0x1{{$}}
 ; GCN-NEXT:      no_stack_extern_call:
+; GCN-NEXT:        .lds_size:       0{{$}}
+; GFX8-NEXT:        .sgpr_count:     0x68{{$}}
+; GFX9-NEXT:        .sgpr_count:     0x66{{$}}
 ; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
+; GCN-NEXT:        .vgpr_count:     0x40{{$}}
 ; GCN-NEXT:      no_stack_extern_call_many_args:
-; SDAG-NEXT:        .stack_frame_size_in_bytes: 0x90{{$}}
-; GISEL-NEXT:        .stack_frame_size_in_bytes: 0x90{{$}}
+; GCN-NEXT:        .lds_size:       0{{$}}
+; GFX8-NEXT:        .sgpr_count:     0x68{{$}}
+; GFX9-NEXT:        .sgpr_count:     0x66{{$}}
+; GCN-NEXT:        .stack_frame_size_in_bytes: 0x90{{$}}
+; GCN-NEXT:        .vgpr_count:     0x40{{$}}
 ; GCN-NEXT:      no_stack_indirect_call:
+; GCN-NEXT:        .lds_size:       0{{$}}
+; GFX8-NEXT:        .sgpr_count:     0x68{{$}}
+; GFX9-NEXT:        .sgpr_count:     0x66{{$}}
 ; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
+; GCN-NEXT:        .vgpr_count:     0x40{{$}}
 ; GCN-NEXT:      simple_lds:
+; GCN-NEXT:        .lds_size:       0x100{{$}}
+; GCN-NEXT:        .sgpr_count:     0x20{{$}}
 ; GCN-NEXT:        .stack_frame_size_in_bytes: 0{{$}}
+; GCN-NEXT:        .vgpr_count:     0x1{{$}}
 ; GCN-NEXT:      simple_lds_recurse:
+; GCN-NEXT:        .lds_size:       0x100{{$}}
+; GFX8-NEXT:        .sgpr_count:     0x68{{$}}
+; GFX9-NEXT:        .sgpr_count:     0x66{{$}}
 ; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
+; GCN-NEXT:        .vgpr_count:     0x40{{$}}
 ; GCN-NEXT:      simple_stack:
+; GCN-NEXT:        .lds_size:       0{{$}}
+; GCN-NEXT:        .sgpr_count:     0x21{{$}}
 ; GCN-NEXT:        .stack_frame_size_in_bytes: 0x14{{$}}
+; GCN-NEXT:        .vgpr_count:     0x2{{$}}
 ; GCN-NEXT:      simple_stack_call:
+; GCN-NEXT:        .lds_size:       0{{$}}
+; GCN-NEXT:        .sgpr_count:     0x22{{$}}
 ; GCN-NEXT:        .stack_frame_size_in_bytes: 0x20{{$}}
+; GCN-NEXT:        .vgpr_count:     0x3{{$}}
 ; GCN-NEXT:      simple_stack_extern_call:
+; GCN-NEXT:        .lds_size:       0{{$}}
+; GFX8-NEXT:        .sgpr_count:     0x68{{$}}
+; GFX9-NEXT:        .sgpr_count:     0x66{{$}}
 ; GCN-NEXT:        .stack_frame_size_in_bytes: 0x20{{$}}
+; GCN-NEXT:        .vgpr_count:     0x40{{$}}
 ; GCN-NEXT:      simple_stack_indirect_call:
+; GCN-NEXT:        .lds_size:       0{{$}}
+; GFX8-NEXT:        .sgpr_count:     0x68{{$}}
+; GFX9-NEXT:        .sgpr_count:     0x66{{$}}
 ; GCN-NEXT:        .stack_frame_size_in_bytes: 0x20{{$}}
+; GCN-NEXT:        .vgpr_count:     0x40{{$}}
 ; GCN-NEXT:      simple_stack_recurse:
+; GCN-NEXT:        .lds_size:       0{{$}}
+; GFX8-NEXT:        .sgpr_count:     0x68{{$}}
+; GFX9-NEXT:        .sgpr_count:     0x66{{$}}
 ; GCN-NEXT:        .stack_frame_size_in_bytes: 0x20{{$}}
+; GCN-NEXT:        .vgpr_count:     0x40{{$}}
 ; GCN-NEXT: ...