[clang] [llvm] [AMDGPU] Convert AMDGPUResourceUsageAnalysis pass from Module to MF pass (PR #102913)

via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 12 07:45:21 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-mc

Author: Janek van Oirschot (JanekvO)

<details>
<summary>Changes</summary>

!!! Stacked PR on top of #<!-- -->95951 commit, please only review the latest commit 51f72f115b340a092c2c9f8569911b944a4efb6d !!!!

Converts AMDGPUResourceUsageAnalysis pass from Module to MachineFunction pass. Moves function resource info propagation to to MC layer (through helpers in AMDGPUMCResourceInfo) by generating MCExprs for every function resource which the emitters have been prepped for.

- Function resource info (finalized) validation added through `ValidateMCResourceInfo`. Previously (and currently, still) done in `getSIProgramInfo`. However, MCExprs may not be resolvable during `getSIProgramInfo` due to propagation of yet-to-be-defined function resource info symbols. Does require some caching of the generated occupancy MCExpr computation in `getSIProgramInfo` to validate against.
- Unfortunately, requires module specific `max_num_Xgprs` symbols to be emitted. Cannot generate a separate MCExpr for finding the max without getting recursive symbol defines.
- Most test changes are caused by the representation changes from constant/known values to MCExprs.

---

Patch is 369.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/102913.diff


66 Files Affected:

- (modified) clang/test/Frontend/amdgcn-machine-analysis-remarks.cl (+5-5) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (+176-42) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h (+14-5) 
- (added) llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp (+220) 
- (added) llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.h (+94) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp (+22-125) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h (+12-28) 
- (modified) llvm/lib/Target/AMDGPU/CMakeLists.txt (+1) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp (+359) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h (+11) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (+55-14) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h (+23) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp (+5-5) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp (+20-26) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h (+4-1) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll (+84-84) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll (+9-7) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/agpr-register-count.ll (+37-28) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll (+2-1) 
- (modified) llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll (+3-1) 
- (modified) llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll (+28-23) 
- (modified) llvm/test/CodeGen/AMDGPU/attributor-noopt.ll (+26-4) 
- (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll (+10-6) 
- (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll (+6-2) 
- (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll (+9-2) 
- (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll (+9-2) 
- (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll (+9-2) 
- (modified) llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll (+39-11) 
- (modified) llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/code-object-v3.ll (+24-10) 
- (modified) llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll (+4-2) 
- (modified) llvm/test/CodeGen/AMDGPU/elf.ll (+4-3) 
- (modified) llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll (+9-5) 
- (added) llvm/test/CodeGen/AMDGPU/function-resource-usage.ll (+533) 
- (modified) llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll (+8-8) 
- (modified) llvm/test/CodeGen/AMDGPU/inline-asm-reserved-regs.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll (+12-12) 
- (modified) llvm/test/CodeGen/AMDGPU/ipra.ll (+7-7) 
- (modified) llvm/test/CodeGen/AMDGPU/kernarg-size.ll (+3-1) 
- (modified) llvm/test/CodeGen/AMDGPU/kernel_code_t_recurse.ll (+6-1) 
- (modified) llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll (+23-8) 
- (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline.ll (+15-30) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-offsets.ll (+13-13) 
- (modified) llvm/test/CodeGen/AMDGPU/mesa3d.ll (+8-5) 
- (modified) llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll (+49-50) 
- (modified) llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll (+12-8) 
- (modified) llvm/test/CodeGen/AMDGPU/recursion.ll (+18-10) 
- (modified) llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll (+32-32) 
- (modified) llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll (+7-6) 
- (modified) llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll (+90-36) 
- (modified) llvm/test/CodeGen/AMDGPU/tid-kd-xnack-any.ll (+2-1) 
- (modified) llvm/test/CodeGen/AMDGPU/tid-kd-xnack-off.ll (+2-1) 
- (modified) llvm/test/CodeGen/AMDGPU/tid-kd-xnack-on.ll (+2-1) 
- (modified) llvm/test/CodeGen/AMDGPU/trap.ll (+21-12) 
- (modified) llvm/test/MC/AMDGPU/amd_kernel_code_t.s (+14-22) 
- (modified) llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s (+32-32) 
- (modified) llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s (+30-30) 
- (modified) llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s (+29-29) 
- (modified) llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s (+27-27) 
- (modified) llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s (+27-27) 
- (modified) llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s (+23-23) 


``````````diff
diff --git a/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
index a05e21b37b9127..a2dd59a871904c 100644
--- a/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
+++ b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
@@ -2,12 +2,12 @@
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=kernel-resource-usage -S -O0 -verify %s -o /dev/null
 
 // expected-remark at +10 {{Function Name: foo}}
-// expected-remark at +9 {{    SGPRs: 13}}
-// expected-remark at +8 {{    VGPRs: 10}}
-// expected-remark at +7 {{    AGPRs: 12}}
-// expected-remark at +6 {{    ScratchSize [bytes/lane]: 0}}
+// expected-remark at +9 {{    SGPRs: foo.num_sgpr+(extrasgprs(foo.uses_vcc, foo.uses_flat_scratch, 1))}}
+// expected-remark at +8 {{    VGPRs: foo.num_vgpr}}
+// expected-remark at +7 {{    AGPRs: foo.num_agpr}}
+// expected-remark at +6 {{    ScratchSize [bytes/lane]: foo.private_seg_size}}
 // expected-remark at +5 {{    Dynamic Stack: False}}
-// expected-remark at +4 {{    Occupancy [waves/SIMD]: 10}}
+// expected-remark at +4 {{    Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 10, max(foo.num_sgpr+(extrasgprs(foo.uses_vcc, foo.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(foo.num_agpr, foo.num_vgpr), 1, 0))}}
 // expected-remark at +3 {{    SGPRs Spill: 0}}
 // expected-remark at +2 {{    VGPRs Spill: 0}}
 // expected-remark at +1 {{    LDS Size [bytes/block]: 0}}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index e64e28e01d3d18..97a5cb29d51023 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -18,6 +18,7 @@
 #include "AMDGPUAsmPrinter.h"
 #include "AMDGPU.h"
 #include "AMDGPUHSAMetadataStreamer.h"
+#include "AMDGPUMCResourceInfo.h"
 #include "AMDGPUResourceUsageAnalysis.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUInstPrinter.h"
@@ -92,6 +93,9 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
                                    std::unique_ptr<MCStreamer> Streamer)
     : AsmPrinter(TM, std::move(Streamer)) {
   assert(OutStreamer && "AsmPrinter constructed without streamer");
+  RI = std::make_unique<MCResourceInfo>(OutContext);
+  OccupancyValidateMap =
+      std::make_unique<DenseMap<const Function *, const MCExpr *>>();
 }
 
 StringRef AMDGPUAsmPrinter::getPassName() const {
@@ -359,6 +363,102 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) {
   return AsmPrinter::doInitialization(M);
 }
 
+void AMDGPUAsmPrinter::ValidateMCResourceInfo(Function &F) {
+  if (F.isDeclaration() || !AMDGPU::isModuleEntryFunctionCC(F.getCallingConv()))
+    return;
+
+  using RIK = MCResourceInfo::ResourceInfoKind;
+  const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);
+
+  auto TryGetMCExprValue = [](const MCExpr *Value, uint64_t &Res) -> bool {
+    int64_t Val;
+    if (Value->evaluateAsAbsolute(Val)) {
+      Res = Val;
+      return true;
+    }
+    return false;
+  };
+
+  const uint64_t MaxScratchPerWorkitem =
+      STM.getMaxWaveScratchSize() / STM.getWavefrontSize();
+  MCSymbol *ScratchSizeSymbol =
+      RI->getSymbol(F.getName(), RIK::RIK_PrivateSegSize);
+  uint64_t ScratchSize;
+  if (ScratchSizeSymbol->isVariable() &&
+      TryGetMCExprValue(ScratchSizeSymbol->getVariableValue(), ScratchSize) &&
+      ScratchSize > MaxScratchPerWorkitem) {
+    DiagnosticInfoStackSize DiagStackSize(F, ScratchSize, MaxScratchPerWorkitem,
+                                          DS_Error);
+    F.getContext().diagnose(DiagStackSize);
+  }
+
+  // Validate addressable scalar registers (i.e., prior to added implicit
+  // SGPRs).
+  MCSymbol *NumSGPRSymbol = RI->getSymbol(F.getName(), RIK::RIK_NumSGPR);
+  if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
+      !STM.hasSGPRInitBug()) {
+    unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
+    uint64_t NumSgpr;
+    if (NumSGPRSymbol->isVariable() &&
+        TryGetMCExprValue(NumSGPRSymbol->getVariableValue(), NumSgpr) &&
+        NumSgpr > MaxAddressableNumSGPRs) {
+      DiagnosticInfoResourceLimit Diag(F, "addressable scalar registers",
+                                       NumSgpr, MaxAddressableNumSGPRs,
+                                       DS_Error, DK_ResourceLimit);
+      F.getContext().diagnose(Diag);
+      return;
+    }
+  }
+
+  MCSymbol *VCCUsedSymbol = RI->getSymbol(F.getName(), RIK::RIK_UsesVCC);
+  MCSymbol *FlatUsedSymbol =
+      RI->getSymbol(F.getName(), RIK::RIK_UsesFlatScratch);
+  uint64_t VCCUsed, FlatUsed, NumSgpr;
+
+  if (NumSGPRSymbol->isVariable() && VCCUsedSymbol->isVariable() &&
+      FlatUsedSymbol->isVariable() &&
+      TryGetMCExprValue(NumSGPRSymbol->getVariableValue(), NumSgpr) &&
+      TryGetMCExprValue(VCCUsedSymbol->getVariableValue(), VCCUsed) &&
+      TryGetMCExprValue(FlatUsedSymbol->getVariableValue(), FlatUsed)) {
+
+    // Recomputes NumSgprs + implicit SGPRs but all symbols should now be
+    // resolvable.
+    NumSgpr += IsaInfo::getNumExtraSGPRs(
+        &STM, VCCUsed, FlatUsed,
+        getTargetStreamer()->getTargetID()->isXnackOnOrAny());
+    if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
+        STM.hasSGPRInitBug()) {
+      unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
+      if (NumSgpr > MaxAddressableNumSGPRs) {
+        DiagnosticInfoResourceLimit Diag(F, "scalar registers", NumSgpr,
+                                         MaxAddressableNumSGPRs, DS_Error,
+                                         DK_ResourceLimit);
+        F.getContext().diagnose(Diag);
+        return;
+      }
+    }
+
+    auto I = OccupancyValidateMap->find(&F);
+    if (I != OccupancyValidateMap->end()) {
+      const auto [MinWEU, MaxWEU] = AMDGPU::getIntegerPairAttribute(
+          F, "amdgpu-waves-per-eu", {0, 0}, true);
+      uint64_t Occupancy;
+      const MCExpr *OccupancyExpr = I->getSecond();
+
+      if (TryGetMCExprValue(OccupancyExpr, Occupancy) && Occupancy < MinWEU) {
+        DiagnosticInfoOptimizationFailure Diag(
+            F, F.getSubprogram(),
+            "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "
+            "'" +
+                F.getName() + "': desired occupancy was " + Twine(MinWEU) +
+                ", final occupancy is " + Twine(Occupancy));
+        F.getContext().diagnose(Diag);
+        return;
+      }
+    }
+  }
+}
+
 bool AMDGPUAsmPrinter::doFinalization(Module &M) {
   // Pad with s_code_end to help tools and guard against instruction prefetch
   // causing stale data in caches. Arguably this should be done by the linker,
@@ -371,39 +471,29 @@ bool AMDGPUAsmPrinter::doFinalization(Module &M) {
     getTargetStreamer()->EmitCodeEnd(STI);
   }
 
-  return AsmPrinter::doFinalization(M);
-}
+  // Assign expressions which can only be resolved when all other functions are
+  // known.
+  RI->Finalize();
+  getTargetStreamer()->EmitMCResourceMaximums(
+      RI->getMaxVGPRSymbol(), RI->getMaxAGPRSymbol(), RI->getMaxSGPRSymbol());
 
-// Print comments that apply to both callable functions and entry points.
-void AMDGPUAsmPrinter::emitCommonFunctionComments(
-    uint32_t NumVGPR, std::optional<uint32_t> NumAGPR, uint32_t TotalNumVGPR,
-    uint32_t NumSGPR, uint64_t ScratchSize, uint64_t CodeSize,
-    const AMDGPUMachineFunction *MFI) {
-  OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
-  OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);
-  OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);
-  if (NumAGPR) {
-    OutStreamer->emitRawComment(" NumAgprs: " + Twine(*NumAGPR), false);
-    OutStreamer->emitRawComment(" TotalNumVgprs: " + Twine(TotalNumVGPR),
-                                false);
+  for (Function &F : M.functions()) {
+    ValidateMCResourceInfo(F);
   }
-  OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);
-  OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),
-                              false);
+  return AsmPrinter::doFinalization(M);
 }
 
 SmallString<128> AMDGPUAsmPrinter::getMCExprStr(const MCExpr *Value) {
   SmallString<128> Str;
   raw_svector_ostream OSS(Str);
-  int64_t IVal;
-  if (Value->evaluateAsAbsolute(IVal)) {
-    OSS << static_cast<uint64_t>(IVal);
-  } else {
-    Value->print(OSS, MAI);
-  }
+  auto &Streamer = getTargetStreamer()->getStreamer();
+  auto &Context = Streamer.getContext();
+  const MCExpr *New = llvm::TryFold(Value, Context);
+  AMDGPUMCExprPrint(New, OSS, MAI);
   return Str;
 }
 
+// Print comments that apply to both callable functions and entry points.
 void AMDGPUAsmPrinter::emitCommonFunctionComments(
     const MCExpr *NumVGPR, const MCExpr *NumAGPR, const MCExpr *TotalNumVGPR,
     const MCExpr *NumSGPR, const MCExpr *ScratchSize, uint64_t CodeSize,
@@ -573,21 +663,45 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   emitResourceUsageRemarks(MF, CurrentProgramInfo, MFI->isModuleEntryFunction(),
                            STM.hasMAIInsts());
 
+  {
+    const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo &Info =
+        ResourceUsage->getResourceInfo();
+    RI->gatherResourceInfo(MF, Info);
+    using RIK = MCResourceInfo::ResourceInfoKind;
+    getTargetStreamer()->EmitMCResourceInfo(
+        RI->getSymbol(MF.getName(), RIK::RIK_NumVGPR),
+        RI->getSymbol(MF.getName(), RIK::RIK_NumAGPR),
+        RI->getSymbol(MF.getName(), RIK::RIK_NumSGPR),
+        RI->getSymbol(MF.getName(), RIK::RIK_PrivateSegSize),
+        RI->getSymbol(MF.getName(), RIK::RIK_UsesVCC),
+        RI->getSymbol(MF.getName(), RIK::RIK_UsesFlatScratch),
+        RI->getSymbol(MF.getName(), RIK::RIK_HasDynSizedStack),
+        RI->getSymbol(MF.getName(), RIK::RIK_HasRecursion),
+        RI->getSymbol(MF.getName(), RIK::RIK_HasIndirectCall));
+  }
+
   if (isVerbose()) {
     MCSectionELF *CommentSection =
         Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
     OutStreamer->switchSection(CommentSection);
 
     if (!MFI->isEntryFunction()) {
+      using RIK = MCResourceInfo::ResourceInfoKind;
       OutStreamer->emitRawComment(" Function info:", false);
-      const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo &Info =
-          ResourceUsage->getResourceInfo(&MF.getFunction());
+
       emitCommonFunctionComments(
-          Info.NumVGPR,
-          STM.hasMAIInsts() ? Info.NumAGPR : std::optional<uint32_t>(),
-          Info.getTotalNumVGPRs(STM),
-          Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
-          Info.PrivateSegmentSize, getFunctionCodeSize(MF), MFI);
+          RI->getSymbol(MF.getName(), RIK::RIK_NumVGPR)->getVariableValue(),
+          STM.hasMAIInsts() ? RI->getSymbol(MF.getName(), RIK::RIK_NumAGPR)
+                                  ->getVariableValue()
+                            : nullptr,
+          RI->createTotalNumVGPRs(MF, Ctx),
+          RI->createTotalNumSGPRs(
+              MF,
+              MF.getSubtarget<GCNSubtarget>().getTargetID().isXnackOnOrAny(),
+              Ctx),
+          RI->getSymbol(MF.getName(), RIK::RIK_PrivateSegSize)
+              ->getVariableValue(),
+          getFunctionCodeSize(MF), MFI);
       return false;
     }
 
@@ -755,8 +869,6 @@ uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const
 
 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
                                         const MachineFunction &MF) {
-  const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo &Info =
-      ResourceUsage->getResourceInfo(&MF.getFunction());
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
   MCContext &Ctx = MF.getContext();
 
@@ -773,18 +885,38 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
     return false;
   };
 
-  ProgInfo.NumArchVGPR = CreateExpr(Info.NumVGPR);
-  ProgInfo.NumAccVGPR = CreateExpr(Info.NumAGPR);
-  ProgInfo.NumVGPR = CreateExpr(Info.getTotalNumVGPRs(STM));
-  ProgInfo.AccumOffset =
-      CreateExpr(alignTo(std::max(1, Info.NumVGPR), 4) / 4 - 1);
+  auto GetSymRefExpr =
+      [&](MCResourceInfo::ResourceInfoKind RIK) -> const MCExpr * {
+    MCSymbol *Sym = RI->getSymbol(MF.getName(), RIK);
+    return MCSymbolRefExpr::create(Sym, Ctx);
+  };
+
+  const MCExpr *ConstFour = MCConstantExpr::create(4, Ctx);
+  const MCExpr *ConstOne = MCConstantExpr::create(1, Ctx);
+
+  using RIK = MCResourceInfo::ResourceInfoKind;
+  ProgInfo.NumArchVGPR = GetSymRefExpr(RIK::RIK_NumVGPR);
+  ProgInfo.NumAccVGPR = GetSymRefExpr(RIK::RIK_NumAGPR);
+  ProgInfo.NumVGPR = AMDGPUMCExpr::createTotalNumVGPR(
+      ProgInfo.NumAccVGPR, ProgInfo.NumArchVGPR, Ctx);
+
+  // AccumOffset computed for the MCExpr equivalent of:
+  // alignTo(std::max(1, Info.NumVGPR), 4) / 4 - 1;
+  ProgInfo.AccumOffset = MCBinaryExpr::createSub(
+      MCBinaryExpr::createDiv(
+          AMDGPUMCExpr::createAlignTo(
+              AMDGPUMCExpr::createMax({ConstOne, ProgInfo.NumArchVGPR}, Ctx),
+              ConstFour, Ctx),
+          ConstFour, Ctx),
+      ConstOne, Ctx);
   ProgInfo.TgSplit = STM.isTgSplitEnabled();
-  ProgInfo.NumSGPR = CreateExpr(Info.NumExplicitSGPR);
-  ProgInfo.ScratchSize = CreateExpr(Info.PrivateSegmentSize);
-  ProgInfo.VCCUsed = CreateExpr(Info.UsesVCC);
-  ProgInfo.FlatUsed = CreateExpr(Info.UsesFlatScratch);
+  ProgInfo.NumSGPR = GetSymRefExpr(RIK::RIK_NumSGPR);
+  ProgInfo.ScratchSize = GetSymRefExpr(RIK::RIK_PrivateSegSize);
+  ProgInfo.VCCUsed = GetSymRefExpr(RIK::RIK_UsesVCC);
+  ProgInfo.FlatUsed = GetSymRefExpr(RIK::RIK_UsesFlatScratch);
   ProgInfo.DynamicCallStack =
-      CreateExpr(Info.HasDynamicallySizedStack || Info.HasRecursion);
+      MCBinaryExpr::createOr(GetSymRefExpr(RIK::RIK_HasDynSizedStack),
+                             GetSymRefExpr(RIK::RIK_HasRecursion), Ctx);
 
   const uint64_t MaxScratchPerWorkitem =
       STM.getMaxWaveScratchSize() / STM.getWavefrontSize();
@@ -1084,6 +1216,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
       STM.computeOccupancy(F, ProgInfo.LDSSize), ProgInfo.NumSGPRsForWavesPerEU,
       ProgInfo.NumVGPRsForWavesPerEU, STM, Ctx);
 
+  OccupancyValidateMap->insert({&MF.getFunction(), ProgInfo.Occupancy});
+
   const auto [MinWEU, MaxWEU] =
       AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", {0, 0}, true);
   uint64_t Occupancy;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index f66bbde42ce278..676a4687ee2af7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -24,6 +24,7 @@ struct AMDGPUResourceUsageAnalysis;
 class AMDGPUTargetStreamer;
 class MCCodeEmitter;
 class MCOperand;
+class MCResourceInfo;
 
 namespace AMDGPU {
 struct MCKernelDescriptor;
@@ -40,12 +41,20 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
 
   AMDGPUResourceUsageAnalysis *ResourceUsage;
 
+  std::unique_ptr<MCResourceInfo> RI;
+
   SIProgramInfo CurrentProgramInfo;
 
   std::unique_ptr<AMDGPU::HSAMD::MetadataStreamer> HSAMetadataStream;
 
   MCCodeEmitter *DumpCodeInstEmitter = nullptr;
 
+  // ValidateMCResourceInfo cannot recompute parts of the occupancy as it does
+  // for other metadata to validate (e.g., NumSGPRs) so a map is necessary if we
+  // really want to track and validate the occupancy.
+  std::unique_ptr<DenseMap<const Function *, const MCExpr *>>
+      OccupancyValidateMap;
+
   uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
 
   void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
@@ -60,11 +69,6 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
   void EmitPALMetadata(const MachineFunction &MF,
                        const SIProgramInfo &KernelInfo);
   void emitPALFunctionMetadata(const MachineFunction &MF);
-  void emitCommonFunctionComments(uint32_t NumVGPR,
-                                  std::optional<uint32_t> NumAGPR,
-                                  uint32_t TotalNumVGPR, uint32_t NumSGPR,
-                                  uint64_t ScratchSize, uint64_t CodeSize,
-                                  const AMDGPUMachineFunction *MFI);
   void emitCommonFunctionComments(const MCExpr *NumVGPR, const MCExpr *NumAGPR,
                                   const MCExpr *TotalNumVGPR,
                                   const MCExpr *NumSGPR,
@@ -84,6 +88,11 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
 
   SmallString<128> getMCExprStr(const MCExpr *Value);
 
+  /// Attempts to replace the validation that is missed in getSIProgramInfo due
+  /// to MCExpr being unknown. Invoked during doFinalization such that the
+  /// MCResourceInfo symbols are known.
+  void ValidateMCResourceInfo(Function &F);
+
 public:
   explicit AMDGPUAsmPrinter(TargetMachine &TM,
                             std::unique_ptr<MCStreamer> Streamer);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
new file mode 100644
index 00000000000000..58383475b312c9
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -0,0 +1,220 @@
+//===- AMDGPUMCResourceInfo.cpp --- MC Resource Info ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief MC infrastructure to propagate the function level resource usage
+/// info.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCResourceInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+
+using namespace llvm;
+
+MCSymbol *MCResourceInfo::getSymbol(StringRef FuncName, ResourceInfoKind RIK) {
+  switch (RIK) {
+  case RIK_NumVGPR:
+    return OutContext.getOrCreateSymbol(FuncName + Twine(".num_vgpr"));
+  case RIK_NumAGPR:
+    return OutContext.getOrCreateSymbol(FuncName + Twine(".num_agpr"));
+  case RIK_NumSGPR:
+    return OutContext.getOrCreateSymbol(FuncName + Twine(".num_sgpr"));
+  case RIK_PrivateSegSize:
+    return OutContext.getOrCreateSymbol(FuncName + Twine(".private_seg_size"));
+  case RIK_UsesVCC:
+    return OutContext.getOrCreateSymbol(FuncName + Twine(".uses_vcc"));
+  case RIK_UsesFlatScratch:
+    return OutContext.getOrCreateSymbol(FuncName + Twine(".uses_flat_scratch"));
+  case RIK_HasDynSizedStack:
+    return OutContext.getOrCreateSymbol(FuncName +
+                                        Twine(".has_dyn_sized_stack"));
+  case RIK_HasRecursion:
+    return OutContext.getOrCreateSymbol(FuncName + Twine(".has_recursion"));
+  case RIK_HasIndirectCall:
+    return OutContext.getOrCreateSymbol(FuncName + Twine(".has_indirect_call"));
+  }
+  llvm_unreachable("Unexpected ResourceInfoKind.");
+}
+
+const MCExpr *MCResourceInfo::getSymRefExpr(StringRef FuncName,
+                                            ResourceInfoKind RIK,
+                                            MCContext &Ctx) {
+  return MCSymbolRefExpr::create(getSymbol(FuncName, RIK), Ctx);
+}
+
+void MCResourceInfo::assignMaxRegs() {
+  // Assign expression to get the max register use to the max_num_Xgpr symbol.
+  MCSymbol *MaxVGPRSym = getMaxVGPRSymbol();
+  MCSymbol *MaxAGPRSym = getMaxAGPRSymbol();
+  MCSymbol *MaxSGPRSym = getMaxSGPRSymbol();
+
+  auto assignMaxRegSym = [this](MCSymbol *Sym, int32_t RegCount) {
+    const MCExpr *MaxExpr = MCConstantExpr::create(RegCount, OutContext);
+    Sym->setVariableValue(MaxExpr);
+  };
+
+  assignMaxRegSym(MaxVGPRSym, MaxVGPR);
+  assignMaxRegSym(MaxAGPRSym, MaxAGPR);
+  assignMaxRegSym(MaxSGPRSym, MaxSGPR);
+}
+
+void MCResourceInfo::Finalize() {
+  assert(!finalized && "Cannot finalize ResourceInfo again.");
+  finalized = true;
+  assignMaxRegs();
+}
+
+MCSymbol *MCResourceInfo::getMaxVGPRSymbol() {
+  return OutContext.getOrCreateSymbol("max_num_vgpr");
+}
+
+MCSymbol *MCResourceInfo::getMaxAGPRSymbol() {
+  return OutContext.getOrCreateSymbol("max_num_agpr");
+}
+
+MCSymbol *MCResourceInfo::getMaxSGPRSymbol() {
+  return OutContext.getOrCreateSymbol("max_num_sgpr");
+}
+
+void MCResourceInfo::assignResourceInfoExpr(
+   ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/102913


More information about the llvm-commits mailing list