[llvm] 6735773 - [AMDGPU] Add remarks to output some resource usage

Vang Thao via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 15 11:02:29 PDT 2022


Author: Vang Thao
Date: 2022-07-15T11:01:53-07:00
New Revision: 67357739c6d36a61972c1fc0e829e35cb5375279

URL: https://github.com/llvm/llvm-project/commit/67357739c6d36a61972c1fc0e829e35cb5375279
DIFF: https://github.com/llvm/llvm-project/commit/67357739c6d36a61972c1fc0e829e35cb5375279.diff

LOG: [AMDGPU] Add remarks to output some resource usage

Add analyis remarks to output kernel name, register usage, occupancy,
scratch usage, spills, and LDS information.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D123878

Added: 
    clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
    llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
    llvm/lib/Target/AMDGPU/SIProgramInfo.h

Removed: 
    


################################################################################
diff  --git a/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
new file mode 100644
index 0000000000000..cf0c15b6319f1
--- /dev/null
+++ b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
@@ -0,0 +1,17 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=kernel-resource-usage -S -O0 -verify %s -o /dev/null
+
+// expected-remark at +9 {{Function Name: foo}}
+// expected-remark at +8 {{    SGPRs: 9}}
+// expected-remark at +7 {{    VGPRs: 10}}
+// expected-remark at +6 {{    AGPRs: 12}}
+// expected-remark at +5 {{    ScratchSize [bytes/lane]: 0}}
+// expected-remark at +4 {{    Occupancy [waves/SIMD]: 10}}
+// expected-remark at +3 {{    SGPRs Spill: 0}}
+// expected-remark at +2 {{    VGPRs Spill: 0}}
+// expected-remark at +1 {{    LDS Size [bytes/block]: 0}}
+__kernel void foo() {
+  __asm volatile ("; clobber s8" :::"s8");
+  __asm volatile ("; clobber v9" :::"v9");
+  __asm volatile ("; clobber a11" :::"a11");
+}

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 57a4660bc1ebc..f1cc40b3a69af 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -27,8 +27,10 @@
 #include "SIMachineFunctionInfo.h"
 #include "TargetInfo/AMDGPUTargetInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
@@ -506,6 +508,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
   emitFunctionBody();
 
+  emitResourceUsageRemarks(MF, CurrentProgramInfo, MFI->isModuleEntryFunction(),
+                           STM.hasMAIInsts());
+
   if (isVerbose()) {
     MCSectionELF *CommentSection =
         Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
@@ -875,6 +880,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
     LDSAlignShift = 9;
   }
 
+  ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs();
+  ProgInfo.VGPRSpill = MFI->getNumSpilledVGPRs();
+
   ProgInfo.LDSSize = MFI->getLDSSize();
   ProgInfo.LDSBlocks =
       alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
@@ -1180,3 +1188,58 @@ void AMDGPUAsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<AMDGPUResourceUsageAnalysis>();
   AsmPrinter::getAnalysisUsage(AU);
 }
+
+void AMDGPUAsmPrinter::emitResourceUsageRemarks(
+    const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo,
+    bool isModuleEntryFunction, bool hasMAIInsts) {
+  if (!ORE)
+    return;
+
+  const char *Name = "kernel-resource-usage";
+  const char *Indent = "    ";
+
+  // If the remark is not specifically enabled, do not output to yaml
+  LLVMContext &Ctx = MF.getFunction().getContext();
+  if (!Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(Name))
+    return;
+
+  auto EmitResourceUsageRemark = [&](StringRef RemarkName,
+                                     StringRef RemarkLabel, auto Argument) {
+    // Add an indent for every line besides the line with the kernel name. This
+    // makes it easier to tell which resource usage go with which kernel since
+    // the kernel name will always be displayed first.
+    std::string LabelStr = RemarkLabel.str() + ": ";
+    if (!RemarkName.equals("FunctionName"))
+      LabelStr = Indent + LabelStr;
+
+    ORE->emit([&]() {
+      return MachineOptimizationRemarkAnalysis(Name, RemarkName,
+                                               MF.getFunction().getSubprogram(),
+                                               &MF.front())
+             << LabelStr << ore::NV(RemarkName, Argument);
+    });
+  };
+
+  // FIXME: Formatting here is pretty nasty because clang does not accept
+  // newlines from diagnostics. This forces us to emit multiple diagnostic
+  // remarks to simulate newlines. If and when clang does accept newlines, this
+  // formatting should be aggregated into one remark with newlines to avoid
+  // printing multiple diagnostic location and diag opts.
+  EmitResourceUsageRemark("FunctionName", "Function Name",
+                          MF.getFunction().getName());
+  EmitResourceUsageRemark("NumSGPR", "SGPRs", CurrentProgramInfo.NumSGPR);
+  EmitResourceUsageRemark("NumVGPR", "VGPRs", CurrentProgramInfo.NumArchVGPR);
+  if (hasMAIInsts)
+    EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR);
+  EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",
+                          CurrentProgramInfo.ScratchSize);
+  EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",
+                          CurrentProgramInfo.Occupancy);
+  EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",
+                          CurrentProgramInfo.SGPRSpill);
+  EmitResourceUsageRemark("VGPRSpill", "VGPRs Spill",
+                          CurrentProgramInfo.VGPRSpill);
+  if (isModuleEntryFunction)
+    EmitResourceUsageRemark("BytesLDS", "LDS Size [bytes/block]",
+                            CurrentProgramInfo.LDSSize);
+}

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index ddda2cf107b11..2881b8d7bccaa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -69,6 +69,9 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
                                   uint64_t ScratchSize,
                                   uint64_t CodeSize,
                                   const AMDGPUMachineFunction* MFI);
+  void emitResourceUsageRemarks(const MachineFunction &MF,
+                                const SIProgramInfo &CurrentProgramInfo,
+                                bool isModuleEntryFunction, bool hasMAIInsts);
 
   uint16_t getAmdhsaKernelCodeProperties(
       const MachineFunction &MF) const;

diff  --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.h b/llvm/lib/Target/AMDGPU/SIProgramInfo.h
index b13afceba20e7..553fb4cf496c1 100644
--- a/llvm/lib/Target/AMDGPU/SIProgramInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.h
@@ -49,6 +49,8 @@ struct SIProgramInfo {
     uint32_t AccumOffset = 0;
     uint32_t TgSplit = 0;
     uint32_t NumSGPR = 0;
+    unsigned SGPRSpill = 0;
+    unsigned VGPRSpill = 0;
     uint32_t LDSSize = 0;
     bool FlatUsed = false;
 

diff  --git a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
new file mode 100644
index 0000000000000..607dc0623236e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
@@ -0,0 +1,158 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=STDERR %s
+; RUN: FileCheck -check-prefix=REMARK %s < %t
+
+; STDERR: remark: foo.cl:27:0: Function Name: test_kernel
+; STDERR-NEXT: remark: foo.cl:27:0:     SGPRs: 24
+; STDERR-NEXT: remark: foo.cl:27:0:     VGPRs: 9
+; STDERR-NEXT: remark: foo.cl:27:0:     AGPRs: 43
+; STDERR-NEXT: remark: foo.cl:27:0:     ScratchSize [bytes/lane]: 0
+; STDERR-NEXT: remark: foo.cl:27:0:     Occupancy [waves/SIMD]: 5
+; STDERR-NEXT: remark: foo.cl:27:0:     SGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:27:0:     VGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:27:0:     LDS Size [bytes/block]: 512
+
+; REMARK-LABEL: --- !Analysis
+; REMARK: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            FunctionName
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          'Function Name: '
+; REMARK-NEXT:   - FunctionName:      test_kernel
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            NumSGPR
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          '    SGPRs: '
+; REMARK-NEXT:   - NumSGPR:         '24'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            NumVGPR
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          '    VGPRs: '
+; REMARK-NEXT:   - NumVGPR:         '9'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            NumAGPR
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          '    AGPRs: '
+; REMARK-NEXT:   - NumAGPR:         '43'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            ScratchSize
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          '    ScratchSize [bytes/lane]: '
+; REMARK-NEXT:   - ScratchSize:     '0'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            Occupancy
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          '    Occupancy [waves/SIMD]: '
+; REMARK-NEXT:   - Occupancy:       '5'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            SGPRSpill
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          '    SGPRs Spill: '
+; REMARK-NEXT:   - SGPRSpill:       '0'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            VGPRSpill
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          '    VGPRs Spill: '
+; REMARK-NEXT:   - VGPRSpill:       '0'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            BytesLDS
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          '    LDS Size [bytes/block]: '
+; REMARK-NEXT:   - BytesLDS:        '512'
+; REMARK-NEXT: ...
+
+ at lds = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4
+
+define amdgpu_kernel void @test_kernel() !dbg !3 {
+  call void asm sideeffect "; clobber v8", "~{v8}"()
+  call void asm sideeffect "; clobber s23", "~{s23}"()
+  call void asm sideeffect "; clobber a42", "~{a42}"()
+  call void asm sideeffect "; use $0", "v"([128 x i32] addrspace(3)* @lds)
+  ret void
+}
+
+; STDERR: remark: foo.cl:42:0: Function Name: test_func
+; STDERR-NEXT: remark: foo.cl:42:0:     SGPRs: 0
+; STDERR-NEXT: remark: foo.cl:42:0:     VGPRs: 0
+; STDERR-NEXT: remark: foo.cl:42:0:     AGPRs: 0
+; STDERR-NEXT: remark: foo.cl:42:0:     ScratchSize [bytes/lane]: 0
+; STDERR-NEXT: remark: foo.cl:42:0:     Occupancy [waves/SIMD]: 0
+; STDERR-NEXT: remark: foo.cl:42:0:     SGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:42:0:     VGPRs Spill: 0
+; STDERR-NOT: LDS Size
+define void @test_func() !dbg !6 {
+  call void asm sideeffect "; clobber v17", "~{v17}"()
+  call void asm sideeffect "; clobber s11", "~{s11}"()
+  call void asm sideeffect "; clobber a9", "~{a9}"()
+  ret void
+}
+
+; STDERR: remark: foo.cl:8:0: Function Name: empty_kernel
+; STDERR-NEXT: remark: foo.cl:8:0:     SGPRs: 0
+; STDERR-NEXT: remark: foo.cl:8:0:     VGPRs: 0
+; STDERR-NEXT: remark: foo.cl:8:0:     AGPRs: 0
+; STDERR-NEXT: remark: foo.cl:8:0:     ScratchSize [bytes/lane]: 0
+; STDERR-NEXT: remark: foo.cl:8:0:     Occupancy [waves/SIMD]: 10
+; STDERR-NEXT: remark: foo.cl:8:0:     SGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:8:0:     VGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:8:0:     LDS Size [bytes/block]: 0
+define amdgpu_kernel void @empty_kernel() !dbg !7 {
+  ret void
+}
+
+; STDERR: remark: foo.cl:52:0: Function Name: empty_func
+; STDERR-NEXT: remark: foo.cl:52:0:     SGPRs: 0
+; STDERR-NEXT: remark: foo.cl:52:0:     VGPRs: 0
+; STDERR-NEXT: remark: foo.cl:52:0:     AGPRs: 0
+; STDERR-NEXT: remark: foo.cl:52:0:     ScratchSize [bytes/lane]: 0
+; STDERR-NEXT: remark: foo.cl:52:0:     Occupancy [waves/SIMD]: 0
+; STDERR-NEXT: remark: foo.cl:52:0:     SGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:52:0:     VGPRs Spill: 0
+define void @empty_func() !dbg !8 {
+  ret void
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "foo.cl", directory: "/tmp")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = distinct !DISubprogram(name: "test_kernel", scope: !1, file: !1, type: !4, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!4 = !DISubroutineType(types: !5)
+!5 = !{null}
+!6 = distinct !DISubprogram(name: "test_func", scope: !1, file: !1, type: !4, scopeLine: 42, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!7 = distinct !DISubprogram(name: "empty_kernel", scope: !1, file: !1, type: !4, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!8 = distinct !DISubprogram(name: "empty_func", scope: !1, file: !1, type: !4, scopeLine: 52, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)


        


More information about the llvm-commits mailing list