[llvm] edd6756 - [AMDGPU] Emit stack frame size in metadata
Sebastian Neubauer via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 25 07:48:44 PST 2020
Author: Sebastian Neubauer
Date: 2020-11-25T16:30:02+01:00
New Revision: edd675643d5ff49e6ea01af2a2a9b40498b3226c
URL: https://github.com/llvm/llvm-project/commit/edd675643d5ff49e6ea01af2a2a9b40498b3226c
DIFF: https://github.com/llvm/llvm-project/commit/edd675643d5ff49e6ea01af2a2a9b40498b3226c.diff
LOG: [AMDGPU] Emit stack frame size in metadata
Add .shader_functions to pal metadata, which contains the stack frame
size for all non-entry-point functions.
Differential Revision: https://reviews.llvm.org/D90036
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index fc785902843c..8148d0487802 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -456,9 +456,12 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
Info = analyzeResourceUsage(MF);
}
- if (STM.isAmdPalOS() && MFI->isEntryFunction())
- EmitPALMetadata(MF, CurrentProgramInfo);
- else if (!STM.isAmdHsaOS()) {
+ if (STM.isAmdPalOS()) {
+ if (MFI->isEntryFunction())
+ EmitPALMetadata(MF, CurrentProgramInfo);
+ else
+ emitPALFunctionMetadata(MF);
+ } else if (!STM.isAmdHsaOS()) {
EmitProgramInfoSI(MF, CurrentProgramInfo);
}
@@ -1260,6 +1263,12 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
MD->setWave32(MF.getFunction().getCallingConv());
}
+void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
+ auto *MD = getTargetStreamer()->getPALMetadata();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ MD->setStackFrameSize(MF, MFI.getStackSize());
+}
+
// This is supposed to be log2(Size)
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
switch (Size) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 54e8338ab4b0..907ff2bfc162 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -78,6 +78,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
const SIProgramInfo &KernelInfo);
void EmitPALMetadata(const MachineFunction &MF,
const SIProgramInfo &KernelInfo);
+ void emitPALFunctionMetadata(const MachineFunction &MF);
void emitCommonFunctionComments(uint32_t NumVGPR,
Optional<uint32_t> NumAGPR,
uint32_t TotalNumVGPR,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
index 85cba165770f..efabab90422f 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -238,6 +238,14 @@ void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) {
getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val);
}
+// Set the scratch size in the metadata.
+void AMDGPUPALMetadata::setStackFrameSize(const MachineFunction &MF,
+ unsigned Val) {
+ auto Node = MsgPackDoc.getMapNode();
+ Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val);
+ getShaderFunctions()[MF.getFunction().getName()] = Node;
+}
+
// Set the hardware register bit in PAL metadata to enable wave32 on the
// shader of the given calling convention.
void AMDGPUPALMetadata::setWave32(unsigned CC) {
@@ -721,6 +729,24 @@ msgpack::MapDocNode AMDGPUPALMetadata::getRegisters() {
return Registers.getMap();
}
+// Reference (create if necessary) the node for the shader functions map.
+msgpack::DocNode &AMDGPUPALMetadata::refShaderFunctions() {
+ auto &N =
+ MsgPackDoc.getRoot()
+ .getMap(/*Convert=*/true)[MsgPackDoc.getNode("amdpal.pipelines")]
+ .getArray(/*Convert=*/true)[0]
+ .getMap(/*Convert=*/true)[MsgPackDoc.getNode(".shader_functions")];
+ N.getMap(/*Convert=*/true);
+ return N;
+}
+
+// Get (create if necessary) the shader functions map.
+msgpack::MapDocNode AMDGPUPALMetadata::getShaderFunctions() {
+ if (ShaderFunctions.isEmpty())
+ ShaderFunctions = refShaderFunctions();
+ return ShaderFunctions.getMap();
+}
+
// Return the PAL metadata hardware shader stage name.
static const char *getStageName(CallingConv::ID CC) {
switch (CC) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
index b089f295364c..3b1767bb1f64 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
#include "llvm/BinaryFormat/MsgPackDocument.h"
+#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
@@ -26,6 +27,7 @@ class AMDGPUPALMetadata {
msgpack::Document MsgPackDoc;
msgpack::DocNode Registers;
msgpack::DocNode HwStages;
+ msgpack::DocNode ShaderFunctions;
public:
// Read the amdgpu.pal.metadata supplied by the frontend, ready for
@@ -76,6 +78,9 @@ class AMDGPUPALMetadata {
// Set the scratch size in the metadata.
void setScratchSize(unsigned CC, unsigned Val);
+ // Set the stack frame size of a function in the metadata.
+ void setStackFrameSize(const MachineFunction &MF, unsigned Val);
+
// Set the hardware register bit in PAL metadata to enable wave32 on the
// shader of the given calling convention.
void setWave32(unsigned CC);
@@ -119,6 +124,12 @@ class AMDGPUPALMetadata {
// Get (create if necessary) the registers map.
msgpack::MapDocNode getRegisters();
+ // Reference (create if necessary) the node for the shader functions map.
+ msgpack::DocNode &refShaderFunctions();
+
+ // Get (create if necessary) the shader functions map.
+ msgpack::MapDocNode getShaderFunctions();
+
// Get (create if necessary) the .hardware_stages entry for the given calling
// convention.
msgpack::MapDocNode getHwStage(unsigned CC);
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
index f50a07d9afef..c6a065e1e65f 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
@@ -1,16 +1,161 @@
-; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -enable-var-scope %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -enable-var-scope %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s
-; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s
-
-; GCN-LABEL: {{^}}gfx_callable_amdpal:
-; GCN: .amdgpu_pal_metadata
-; GCN-NEXT: ---
-; GCN-NEXT: amdpal.pipelines:
+; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
+; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL -enable-var-scope %s
+
+declare float @extern_func(float) #0
+declare float @extern_func_many_args(<64 x float>) #0
+
+ at funcptr = external hidden unnamed_addr addrspace(4) constant void()*, align 4
+
+define amdgpu_gfx float @no_stack(float %arg0) #0 {
+ %add = fadd float %arg0, 1.0
+ ret float %add
+}
+
+define amdgpu_gfx float @simple_stack(float %arg0) #0 {
+ %stack = alloca float, i32 4, align 4, addrspace(5)
+ store volatile float 2.0, float addrspace(5)* %stack
+ %val = load volatile float, float addrspace(5)* %stack
+ %add = fadd float %arg0, %val
+ ret float %add
+}
+
+define amdgpu_gfx float @multiple_stack(float %arg0) #0 {
+ %stack = alloca float, i32 4, align 4, addrspace(5)
+ store volatile float 2.0, float addrspace(5)* %stack
+ %val = load volatile float, float addrspace(5)* %stack
+ %add = fadd float %arg0, %val
+ %stack2 = alloca float, i32 4, align 4, addrspace(5)
+ store volatile float 2.0, float addrspace(5)* %stack2
+ %val2 = load volatile float, float addrspace(5)* %stack2
+ %add2 = fadd float %add, %val2
+ ret float %add2
+}
+
+define amdgpu_gfx float @dynamic_stack(float %arg0) #0 {
+bb0:
+ %cmp = fcmp ogt float %arg0, 0.0
+ br i1 %cmp, label %bb1, label %bb2
+
+bb1:
+ %stack = alloca float, i32 4, align 4, addrspace(5)
+ store volatile float 2.0, float addrspace(5)* %stack
+ %val = load volatile float, float addrspace(5)* %stack
+ %add = fadd float %arg0, %val
+ br label %bb2
+
+bb2:
+ %res = phi float [ 0.0, %bb0 ], [ %add, %bb1 ]
+ ret float %res
+}
+
+define amdgpu_gfx float @dynamic_stack_loop(float %arg0) #0 {
+bb0:
+ br label %bb1
+
+bb1:
+ %ctr = phi i32 [ 0, %bb0 ], [ %newctr, %bb1 ]
+ %stack = alloca float, i32 4, align 4, addrspace(5)
+ store volatile float 2.0, float addrspace(5)* %stack
+ %val = load volatile float, float addrspace(5)* %stack
+ %add = fadd float %arg0, %val
+ %cmp = icmp sgt i32 %ctr, 0
+ %newctr = sub i32 %ctr, 1
+ br i1 %cmp, label %bb1, label %bb2
+
+bb2:
+ ret float %add
+}
+
+define amdgpu_gfx float @no_stack_call(float %arg0) #0 {
+ %res = call amdgpu_gfx float @simple_stack(float %arg0)
+ ret float %res
+}
+
+define amdgpu_gfx float @simple_stack_call(float %arg0) #0 {
+ %stack = alloca float, i32 4, align 4, addrspace(5)
+ store volatile float 2.0, float addrspace(5)* %stack
+ %val = load volatile float, float addrspace(5)* %stack
+ %res = call amdgpu_gfx float @simple_stack(float %arg0)
+ %add = fadd float %res, %val
+ ret float %add
+}
+
+define amdgpu_gfx float @no_stack_extern_call(float %arg0) #0 {
+ %res = call amdgpu_gfx float @extern_func(float %arg0)
+ ret float %res
+}
+
+define amdgpu_gfx float @simple_stack_extern_call(float %arg0) #0 {
+ %stack = alloca float, i32 4, align 4, addrspace(5)
+ store volatile float 2.0, float addrspace(5)* %stack
+ %val = load volatile float, float addrspace(5)* %stack
+ %res = call amdgpu_gfx float @extern_func(float %arg0)
+ %add = fadd float %res, %val
+ ret float %add
+}
+
+define amdgpu_gfx float @no_stack_extern_call_many_args(<64 x float> %arg0) #0 {
+ %res = call amdgpu_gfx float @extern_func_many_args(<64 x float> %arg0)
+ ret float %res
+}
+
+define amdgpu_gfx float @no_stack_indirect_call(float %arg0) #0 {
+ %fptr = load void()*, void()* addrspace(4)* @funcptr
+ call amdgpu_gfx void %fptr()
+ ret float %arg0
+}
+
+define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) #0 {
+ %stack = alloca float, i32 4, align 4, addrspace(5)
+ store volatile float 2.0, float addrspace(5)* %stack
+ %val = load volatile float, float addrspace(5)* %stack
+ %fptr = load void()*, void()* addrspace(4)* @funcptr
+ call amdgpu_gfx void %fptr()
+ %add = fadd float %arg0, %val
+ ret float %add
+}
+
+define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
+ %stack = alloca float, i32 4, align 4, addrspace(5)
+ store volatile float 2.0, float addrspace(5)* %stack
+ %val = load volatile float, float addrspace(5)* %stack
+ %res = call amdgpu_gfx float @simple_stack_recurse(float %arg0)
+ %add = fadd float %res, %val
+ ret float %add
+}
+
+attributes #0 = { nounwind }
+
+; GCN: amdpal.pipelines:
; GCN-NEXT: - .registers: {}
+; GCN-NEXT: .shader_functions:
+; GCN-NEXT: dynamic_stack:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
+; GCN-NEXT: dynamic_stack_loop:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
+; GCN-NEXT: multiple_stack:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x24{{$}}
+; GCN-NEXT: no_stack:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
+; GCN-NEXT: no_stack_call:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
+; GCN-NEXT: no_stack_extern_call:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
+; GCN-NEXT: no_stack_extern_call_many_args:
+; SDAG-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
+; GISEL-NEXT: .stack_frame_size_in_bytes: 0xd0{{$}}
+; GCN-NEXT: no_stack_indirect_call:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
+; GCN-NEXT: simple_stack:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}}
+; GCN-NEXT: simple_stack_call:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
+; GCN-NEXT: simple_stack_extern_call:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
+; GCN-NEXT: simple_stack_indirect_call:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
+; GCN-NEXT: simple_stack_recurse:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
; GCN-NEXT: ...
-; GCN-NEXT: .end_amdgpu_pal_metadata
-define amdgpu_gfx half @gfx_callable_amdpal(half %arg0) {
- %add = fadd half %arg0, 1.0
- ret half %add
-}
More information about the llvm-commits
mailing list