[clang] 7a4968b - [AMDGPU] Add dynamic stack bit info to kernel-resource-usage Rpass output

Corbin Robeck via cfe-commits cfe-commits at lists.llvm.org
Tue Jul 25 12:22:10 PDT 2023


Author: Corbin Robeck
Date: 2023-07-25T12:20:13-07:00
New Revision: 7a4968b5a378d1f06e638c99d0e983c35045fb34

URL: https://github.com/llvm/llvm-project/commit/7a4968b5a378d1f06e638c99d0e983c35045fb34
DIFF: https://github.com/llvm/llvm-project/commit/7a4968b5a378d1f06e638c99d0e983c35045fb34.diff

LOG: [AMDGPU] Add dynamic stack bit info to kernel-resource-usage Rpass output

In code object 5 (https://llvm.org/docs/AMDGPUUsage.html#code-object-v5-metadata) the AMDGPU backend added the .uses_dynamic_stack bit to the kernel meta data to identity kernels which have compile time indeterminable stack usage (indirect function calls and recursion mainly). This patch adds this information to the output of the kernel-resource-usage remarks.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D156040

Author:    Corbin Robeck <corbin.robeck at amd.com>

Added: 
    

Modified: 
    clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
    llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll

Removed: 
    


################################################################################
diff  --git a/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
index 9403d12afa05a7..a05e21b37b9127 100644
--- a/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
+++ b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
@@ -1,11 +1,12 @@
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=kernel-resource-usage -S -O0 -verify %s -o /dev/null
 
-// expected-remark at +9 {{Function Name: foo}}
-// expected-remark at +8 {{    SGPRs: 13}}
-// expected-remark at +7 {{    VGPRs: 10}}
-// expected-remark at +6 {{    AGPRs: 12}}
-// expected-remark at +5 {{    ScratchSize [bytes/lane]: 0}}
+// expected-remark at +10 {{Function Name: foo}}
+// expected-remark at +9 {{    SGPRs: 13}}
+// expected-remark at +8 {{    VGPRs: 10}}
+// expected-remark at +7 {{    AGPRs: 12}}
+// expected-remark at +6 {{    ScratchSize [bytes/lane]: 0}}
+// expected-remark at +5 {{    Dynamic Stack: False}}
 // expected-remark at +4 {{    Occupancy [waves/SIMD]: 10}}
 // expected-remark at +3 {{    SGPRs Spill: 0}}
 // expected-remark at +2 {{    VGPRs Spill: 0}}

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 7cd8e53e65215f..4b9c699879e349 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1293,6 +1293,9 @@ void AMDGPUAsmPrinter::emitResourceUsageRemarks(
     EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR);
   EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",
                           CurrentProgramInfo.ScratchSize);
+  StringRef DynamicStackStr =
+      CurrentProgramInfo.DynamicCallStack ? "True" : "False";
+  EmitResourceUsageRemark("DynamicStack", "Dynamic Stack", DynamicStackStr);
   EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",
                           CurrentProgramInfo.Occupancy);
   EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",

diff  --git a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
index 2616b043324191..7252aa6120cab4 100644
--- a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
+++ b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=STDERR %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefix=STDERR %s
 ; RUN: FileCheck -check-prefix=REMARK %s < %t
 
 ; STDERR: remark: foo.cl:27:0: Function Name: test_kernel
@@ -6,6 +6,7 @@
 ; STDERR-NEXT: remark: foo.cl:27:0:     VGPRs: 9
 ; STDERR-NEXT: remark: foo.cl:27:0:     AGPRs: 43
 ; STDERR-NEXT: remark: foo.cl:27:0:     ScratchSize [bytes/lane]: 0
+; STDERR-NEXT: remark: foo.cl:27:0:     Dynamic Stack: False
 ; STDERR-NEXT: remark: foo.cl:27:0:     Occupancy [waves/SIMD]: 5
 ; STDERR-NEXT: remark: foo.cl:27:0:     SGPRs Spill: 0
 ; STDERR-NEXT: remark: foo.cl:27:0:     VGPRs Spill: 0
@@ -55,7 +56,16 @@
 ; REMARK-NEXT: Args:
 ; REMARK-NEXT:   - String:          '    ScratchSize [bytes/lane]: '
 ; REMARK-NEXT:   - ScratchSize:     '0'
-; REMARK-NEXT: ...
+; REMARK-NEXT: ..
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            DynamicStack
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String: ' Dynamic Stack: 
+; REMARK-NEXT:   - DynamicStack: 'False' 
+; REMARK-NEXT: ..
 ; REMARK-NEXT: --- !Analysis
 ; REMARK-NEXT: Pass:            kernel-resource-usage
 ; REMARK-NEXT: Name:            Occupancy
@@ -108,6 +118,7 @@ define amdgpu_kernel void @test_kernel() !dbg !3 {
 ; STDERR-NEXT: remark: foo.cl:42:0:     VGPRs: 0
 ; STDERR-NEXT: remark: foo.cl:42:0:     AGPRs: 0
 ; STDERR-NEXT: remark: foo.cl:42:0:     ScratchSize [bytes/lane]: 0
+; STDERR-NEXT: remark: foo.cl:42:0:     Dynamic Stack: False
 ; STDERR-NEXT: remark: foo.cl:42:0:     Occupancy [waves/SIMD]: 0
 ; STDERR-NEXT: remark: foo.cl:42:0:     SGPRs Spill: 0
 ; STDERR-NEXT: remark: foo.cl:42:0:     VGPRs Spill: 0
@@ -124,6 +135,7 @@ define void @test_func() !dbg !6 {
 ; STDERR-NEXT: remark: foo.cl:8:0:     VGPRs: 0
 ; STDERR-NEXT: remark: foo.cl:8:0:     AGPRs: 0
 ; STDERR-NEXT: remark: foo.cl:8:0:     ScratchSize [bytes/lane]: 0
+; STDERR-NEXT: remark: foo.cl:8:0:     Dynamic Stack: False
 ; STDERR-NEXT: remark: foo.cl:8:0:     Occupancy [waves/SIMD]: 8
 ; STDERR-NEXT: remark: foo.cl:8:0:     SGPRs Spill: 0
 ; STDERR-NEXT: remark: foo.cl:8:0:     VGPRs Spill: 0
@@ -137,6 +149,7 @@ define amdgpu_kernel void @empty_kernel() !dbg !7 {
 ; STDERR-NEXT: remark: foo.cl:52:0:     VGPRs: 0
 ; STDERR-NEXT: remark: foo.cl:52:0:     AGPRs: 0
 ; STDERR-NEXT: remark: foo.cl:52:0:     ScratchSize [bytes/lane]: 0
+; STDERR-NEXT: remark: foo.cl:52:0:     Dynamic Stack: False
 ; STDERR-NEXT: remark: foo.cl:52:0:     Occupancy [waves/SIMD]: 0
 ; STDERR-NEXT: remark: foo.cl:52:0:     SGPRs Spill: 0
 ; STDERR-NEXT: remark: foo.cl:52:0:     VGPRs Spill: 0
@@ -144,8 +157,48 @@ define void @empty_func() !dbg !8 {
   ret void
 }
 
+; STDERR: remark: foo.cl:64:0: Function Name: test_indirect_call
+; STDERR-NEXT: remark: foo.cl:64:0:     SGPRs: 39
+; STDERR-NEXT: remark: foo.cl:64:0:     VGPRs: 32
+; STDERR-NEXT: remark: foo.cl:64:0:     AGPRs: 10
+; STDERR-NEXT: remark: foo.cl:64:0:     ScratchSize [bytes/lane]: 0
+; STDERR-NEXT: remark: foo.cl:64:0:     Dynamic Stack: True
+; STDERR-NEXT: remark: foo.cl:64:0:     Occupancy [waves/SIMD]: 8
+; STDERR-NEXT: remark: foo.cl:64:0:     SGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:64:0:     VGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:64:0:     LDS Size [bytes/block]: 0
+ at gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
+
+define amdgpu_kernel void @test_indirect_call() !dbg !9 {
+  %fptr = load ptr, ptr addrspace(4) @gv.fptr0
+  call void %fptr()
+  ret void
+}
+
+; STDERR: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack
+; STDERR-NEXT: remark: foo.cl:74:0:     SGPRs: 39
+; STDERR-NEXT: remark: foo.cl:74:0:     VGPRs: 32
+; STDERR-NEXT: remark: foo.cl:74:0:     AGPRs: 10
+; STDERR-NEXT: remark: foo.cl:74:0:     ScratchSize [bytes/lane]: 64
+; STDERR-NEXT: remark: foo.cl:74:0:     Dynamic Stack: True
+; STDERR-NEXT: remark: foo.cl:74:0:     Occupancy [waves/SIMD]: 8
+; STDERR-NEXT: remark: foo.cl:74:0:     SGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:74:0:     VGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:74:0:     LDS Size [bytes/block]: 0
+
+declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture readonly, i8, i64, i1 immarg)
+ 
+define amdgpu_kernel void @test_indirect_w_static_stack() !dbg !10 {
+  %alloca = alloca <10 x i64>, align 16, addrspace(5)
+  call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 40, i1 false)
+  %fptr = load ptr, ptr addrspace(4) @gv.fptr0
+  call void %fptr()
+  ret void
+}
+
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!2}
+!llvm.module.flags = !{!11}
 
 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
 !1 = !DIFile(filename: "foo.cl", directory: "/tmp")
@@ -156,3 +209,6 @@ define void @empty_func() !dbg !8 {
 !6 = distinct !DISubprogram(name: "test_func", scope: !1, file: !1, type: !4, scopeLine: 42, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
 !7 = distinct !DISubprogram(name: "empty_kernel", scope: !1, file: !1, type: !4, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
 !8 = distinct !DISubprogram(name: "empty_func", scope: !1, file: !1, type: !4, scopeLine: 52, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!9 = distinct !DISubprogram(name: "test_indirect_call", scope: !1, file: !1, type: !4, scopeLine: 64, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!10 = distinct !DISubprogram(name: "test_indirect_w_static_stack", scope: !1, file: !1, type: !4, scopeLine: 74, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!11 = !{i32 1, !"amdgpu_code_object_version", i32 500}


        


More information about the cfe-commits mailing list