[llvm] MCExpr-ify SIProgramInfo (PR #88257)

via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 10 04:49:47 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Janek van Oirschot (JanekvO)

<details>
<summary>Changes</summary>

All members in SIProgramInfo that are affected by variables provided by AMDGPUResourceUsageAnalysis need to become MCExpr for when AMDGPUResourceUsageAnalysis' resource info propagation is done in the MC layer, through MCExprs. Additionally, some operations done on said resource info to create/populate some of the SIProgramInfo members now have become custom MCExpr operations (e.g., occupancy compute, totalnumvgpr, etc.).

---

Patch is 68.41 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/88257.diff


14 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (+265-131) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h (+2) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp (+16-6) 
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+7-1) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp (+221) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h (+34-1) 
- (modified) llvm/lib/Target/AMDGPU/SIProgramInfo.cpp (+175-32) 
- (modified) llvm/lib/Target/AMDGPU/SIProgramInfo.h (+28-17) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+9-3) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+6) 
- (added) llvm/test/MC/AMDGPU/alignto_mcexpr.s (+15) 
- (added) llvm/test/MC/AMDGPU/extrasgprs_mcexpr.s (+41) 
- (added) llvm/test/MC/AMDGPU/occupancy_mcexpr.s (+61) 
- (added) llvm/test/MC/AMDGPU/totalnumvgpr_mcexpr.s (+25) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 052b231d62a3eb..b410f0c13e1b49 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -22,6 +22,7 @@
 #include "AMDKernelCodeT.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUMCExpr.h"
 #include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
 #include "R600AsmPrinter.h"
@@ -134,6 +135,12 @@ void AMDGPUAsmPrinter::initTargetStreamer(Module &M) {
     getTargetStreamer()->getPALMetadata()->readFromIR(M);
 }
 
+uint64_t AMDGPUAsmPrinter::getMCExprValue(const MCExpr *Value) {
+  int64_t Val;
+  Value->evaluateAsAbsolute(Val);
+  return Val;
+}
+
 void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) {
   // Init target streamer if it has not yet happened
   if (!IsTargetStreamerInitialized)
@@ -237,12 +244,14 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
   getNameWithPrefix(KernelName, &MF->getFunction());
   getTargetStreamer()->EmitAmdhsaKernelDescriptor(
       STM, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
-      CurrentProgramInfo.NumVGPRsForWavesPerEU,
-      CurrentProgramInfo.NumSGPRsForWavesPerEU -
+      getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU),
+      getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU) -
           IsaInfo::getNumExtraSGPRs(
-              &STM, CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
+              &STM, getMCExprValue(CurrentProgramInfo.VCCUsed),
+              getMCExprValue(CurrentProgramInfo.FlatUsed),
               getTargetStreamer()->getTargetID()->isXnackOnOrAny()),
-      CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);
+      getMCExprValue(CurrentProgramInfo.VCCUsed),
+      getMCExprValue(CurrentProgramInfo.FlatUsed));
 
   Streamer.popSection();
 }
@@ -422,7 +431,7 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
   }
 
-  if (CurrentProgramInfo.DynamicCallStack &&
+  if (getMCExprValue(CurrentProgramInfo.DynamicCallStack) &&
       CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
     KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK;
 
@@ -439,29 +448,22 @@ AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF,
 
   MCKernelDescriptor KernelDescriptor;
 
-  assert(isUInt<32>(PI.ScratchSize));
-  assert(isUInt<32>(PI.getComputePGMRSrc1(STM)));
-  assert(isUInt<32>(PI.getComputePGMRSrc2()));
-
   KernelDescriptor.group_segment_fixed_size =
       MCConstantExpr::create(PI.LDSSize, Ctx);
-  KernelDescriptor.private_segment_fixed_size =
-      MCConstantExpr::create(PI.ScratchSize, Ctx);
+  KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
 
   Align MaxKernArgAlign;
   KernelDescriptor.kernarg_size = MCConstantExpr::create(
       STM.getKernArgSegmentSize(F, MaxKernArgAlign), Ctx);
 
-  KernelDescriptor.compute_pgm_rsrc1 =
-      MCConstantExpr::create(PI.getComputePGMRSrc1(STM), Ctx);
-  KernelDescriptor.compute_pgm_rsrc2 =
-      MCConstantExpr::create(PI.getComputePGMRSrc2(), Ctx);
+  KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM, Ctx);
+  KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2(Ctx);
   KernelDescriptor.kernel_code_properties =
       MCConstantExpr::create(getAmdhsaKernelCodeProperties(MF), Ctx);
 
-  assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
-  KernelDescriptor.compute_pgm_rsrc3 = MCConstantExpr::create(
-      STM.hasGFX90AInsts() ? CurrentProgramInfo.ComputePGMRSrc3GFX90A : 0, Ctx);
+  assert(STM.hasGFX90AInsts() ||
+         getMCExprValue(CurrentProgramInfo.ComputePGMRSrc3GFX90A) == 0);
+  KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3GFX90A;
 
   KernelDescriptor.kernarg_preload = MCConstantExpr::create(
       AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0,
@@ -477,7 +479,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
     initTargetStreamer(*MF.getFunction().getParent());
 
   ResourceUsage = &getAnalysis<AMDGPUResourceUsageAnalysis>();
-  CurrentProgramInfo = SIProgramInfo();
+  CurrentProgramInfo.reset(MF);
 
   const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
 
@@ -550,11 +552,13 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
     OutStreamer->emitRawComment(" Kernel info:", false);
     emitCommonFunctionComments(
-        CurrentProgramInfo.NumArchVGPR,
-        STM.hasMAIInsts() ? CurrentProgramInfo.NumAccVGPR
+        getMCExprValue(CurrentProgramInfo.NumArchVGPR),
+        STM.hasMAIInsts() ? getMCExprValue(CurrentProgramInfo.NumAccVGPR)
                           : std::optional<uint32_t>(),
-        CurrentProgramInfo.NumVGPR, CurrentProgramInfo.NumSGPR,
-        CurrentProgramInfo.ScratchSize, getFunctionCodeSize(MF), MFI);
+        getMCExprValue(CurrentProgramInfo.NumVGPR),
+        getMCExprValue(CurrentProgramInfo.NumSGPR),
+        getMCExprValue(CurrentProgramInfo.ScratchSize), getFunctionCodeSize(MF),
+        MFI);
 
     OutStreamer->emitRawComment(
       " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);
@@ -565,32 +569,38 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
       " bytes/workgroup (compile time only)", false);
 
     OutStreamer->emitRawComment(
-      " SGPRBlocks: " + Twine(CurrentProgramInfo.SGPRBlocks), false);
+        " SGPRBlocks: " + Twine(getMCExprValue(CurrentProgramInfo.SGPRBlocks)),
+        false);
     OutStreamer->emitRawComment(
-      " VGPRBlocks: " + Twine(CurrentProgramInfo.VGPRBlocks), false);
+        " VGPRBlocks: " + Twine(getMCExprValue(CurrentProgramInfo.VGPRBlocks)),
+        false);
 
     OutStreamer->emitRawComment(
-      " NumSGPRsForWavesPerEU: " +
-      Twine(CurrentProgramInfo.NumSGPRsForWavesPerEU), false);
+        " NumSGPRsForWavesPerEU: " +
+            Twine(getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU)),
+        false);
     OutStreamer->emitRawComment(
-      " NumVGPRsForWavesPerEU: " +
-      Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false);
+        " NumVGPRsForWavesPerEU: " +
+            Twine(getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU)),
+        false);
 
     if (STM.hasGFX90AInsts())
       OutStreamer->emitRawComment(
-        " AccumOffset: " +
-        Twine((CurrentProgramInfo.AccumOffset + 1) * 4), false);
+          " AccumOffset: " +
+              Twine((getMCExprValue(CurrentProgramInfo.AccumOffset) + 1) * 4),
+          false);
 
     OutStreamer->emitRawComment(
-      " Occupancy: " +
-      Twine(CurrentProgramInfo.Occupancy), false);
+        " Occupancy: " + Twine(getMCExprValue(CurrentProgramInfo.Occupancy)),
+        false);
 
     OutStreamer->emitRawComment(
       " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
 
-    OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
-                                    Twine(CurrentProgramInfo.ScratchEnable),
-                                false);
+    OutStreamer->emitRawComment(
+        " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
+            Twine(getMCExprValue(CurrentProgramInfo.ScratchEnable)),
+        false);
     OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
                                     Twine(CurrentProgramInfo.UserSGPR),
                                 false);
@@ -611,18 +621,20 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
                                 false);
 
     assert(STM.hasGFX90AInsts() ||
-           CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
+           getMCExprValue(CurrentProgramInfo.ComputePGMRSrc3GFX90A) == 0);
     if (STM.hasGFX90AInsts()) {
       OutStreamer->emitRawComment(
-        " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
-        Twine((AMDHSA_BITS_GET(CurrentProgramInfo.ComputePGMRSrc3GFX90A,
-                               amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET))),
-                               false);
+          " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
+              Twine((AMDHSA_BITS_GET(
+                  getMCExprValue(CurrentProgramInfo.ComputePGMRSrc3GFX90A),
+                  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET))),
+          false);
       OutStreamer->emitRawComment(
-        " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
-        Twine((AMDHSA_BITS_GET(CurrentProgramInfo.ComputePGMRSrc3GFX90A,
-                               amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT))),
-                               false);
+          " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
+              Twine((AMDHSA_BITS_GET(
+                  getMCExprValue(CurrentProgramInfo.ComputePGMRSrc3GFX90A),
+                  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT))),
+          false);
     }
   }
 
@@ -702,23 +714,40 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo &Info =
       ResourceUsage->getResourceInfo(&MF.getFunction());
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
+  MCContext &Ctx = MF.getContext();
+
+  auto CreateExpr = [&Ctx](int64_t Value) {
+    return MCConstantExpr::create(Value, Ctx);
+  };
 
-  ProgInfo.NumArchVGPR = Info.NumVGPR;
-  ProgInfo.NumAccVGPR = Info.NumAGPR;
-  ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM);
-  ProgInfo.AccumOffset = alignTo(std::max(1, Info.NumVGPR), 4) / 4 - 1;
+  auto TryGetMCExprValue = [&Ctx](const MCExpr *Value, uint64_t &Res) -> bool {
+    int64_t Val;
+    if (Value->evaluateAsAbsolute(Val)) {
+      Res = Val;
+      return true;
+    } else
+      return false;
+  };
+
+  ProgInfo.NumArchVGPR = CreateExpr(Info.NumVGPR);
+  ProgInfo.NumAccVGPR = CreateExpr(Info.NumAGPR);
+  ProgInfo.NumVGPR = CreateExpr(Info.getTotalNumVGPRs(STM));
+  ProgInfo.AccumOffset =
+      CreateExpr(alignTo(std::max(1, Info.NumVGPR), 4) / 4 - 1);
   ProgInfo.TgSplit = STM.isTgSplitEnabled();
-  ProgInfo.NumSGPR = Info.NumExplicitSGPR;
-  ProgInfo.ScratchSize = Info.PrivateSegmentSize;
-  ProgInfo.VCCUsed = Info.UsesVCC;
-  ProgInfo.FlatUsed = Info.UsesFlatScratch;
-  ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
+  ProgInfo.NumSGPR = CreateExpr(Info.NumExplicitSGPR);
+  ProgInfo.ScratchSize = CreateExpr(Info.PrivateSegmentSize);
+  ProgInfo.VCCUsed = CreateExpr(Info.UsesVCC);
+  ProgInfo.FlatUsed = CreateExpr(Info.UsesFlatScratch);
+  ProgInfo.DynamicCallStack =
+      CreateExpr(Info.HasDynamicallySizedStack || Info.HasRecursion);
 
   const uint64_t MaxScratchPerWorkitem =
       STM.getMaxWaveScratchSize() / STM.getWavefrontSize();
-  if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) {
-    DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
-                                          ProgInfo.ScratchSize,
+  uint64_t ScratchSize;
+  if (TryGetMCExprValue(ProgInfo.ScratchSize, ScratchSize) &&
+      ScratchSize > MaxScratchPerWorkitem) {
+    DiagnosticInfoStackSize DiagStackSize(MF.getFunction(), ScratchSize,
                                           MaxScratchPerWorkitem, DS_Error);
     MF.getFunction().getContext().diagnose(DiagStackSize);
   }
@@ -728,27 +757,30 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   // The calculations related to SGPR/VGPR blocks are
   // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
   // unified.
-  unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
-      &STM, ProgInfo.VCCUsed, ProgInfo.FlatUsed,
-      getTargetStreamer()->getTargetID()->isXnackOnOrAny());
+  const MCExpr *ExtraSGPRs = AMDGPUVariadicMCExpr::createExtraSGPRs(
+      ProgInfo.VCCUsed, ProgInfo.FlatUsed, getIsaVersion(STM.getCPU()).Major,
+      STM.getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch),
+      getTargetStreamer()->getTargetID()->isXnackOnOrAny(), Ctx);
 
   // Check the addressable register limit before we add ExtraSGPRs.
   if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
       !STM.hasSGPRInitBug()) {
     unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
-    if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
+    uint64_t NumSgpr;
+    if (TryGetMCExprValue(ProgInfo.NumSGPR, NumSgpr) &&
+        NumSgpr > MaxAddressableNumSGPRs) {
       // This can happen due to a compiler bug or when using inline asm.
       LLVMContext &Ctx = MF.getFunction().getContext();
       DiagnosticInfoResourceLimit Diag(
-          MF.getFunction(), "addressable scalar registers", ProgInfo.NumSGPR,
+          MF.getFunction(), "addressable scalar registers", NumSgpr,
           MaxAddressableNumSGPRs, DS_Error, DK_ResourceLimit);
       Ctx.diagnose(Diag);
-      ProgInfo.NumSGPR = MaxAddressableNumSGPRs - 1;
+      ProgInfo.NumSGPR = CreateExpr(MaxAddressableNumSGPRs - 1);
     }
   }
 
   // Account for extra SGPRs and VGPRs reserved for debugger use.
-  ProgInfo.NumSGPR += ExtraSGPRs;
+  ProgInfo.NumSGPR = MCBinaryExpr::createAdd(ProgInfo.NumSGPR, ExtraSGPRs, Ctx);
 
   const Function &F = MF.getFunction();
 
@@ -819,40 +851,50 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
         }
       }
     }
-    ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
-    ProgInfo.NumArchVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
-    ProgInfo.NumVGPR =
-        Info.getTotalNumVGPRs(STM, Info.NumAGPR, ProgInfo.NumArchVGPR);
+    ProgInfo.NumSGPR = AMDGPUVariadicMCExpr::createMax(
+        {ProgInfo.NumSGPR, CreateExpr(WaveDispatchNumSGPR)}, Ctx);
+
+    ProgInfo.NumArchVGPR = AMDGPUVariadicMCExpr::createMax(
+        {ProgInfo.NumVGPR, CreateExpr(WaveDispatchNumVGPR)}, Ctx);
+
+    ProgInfo.NumVGPR = AMDGPUVariadicMCExpr::createTotalNumVGPR(
+        STM.hasGFX90AInsts(), ProgInfo.NumAccVGPR, ProgInfo.NumArchVGPR, Ctx);
   }
 
   // Adjust number of registers used to meet default/requested minimum/maximum
   // number of waves per execution unit request.
-  ProgInfo.NumSGPRsForWavesPerEU = std::max(
-    std::max(ProgInfo.NumSGPR, 1u), STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
-  ProgInfo.NumVGPRsForWavesPerEU = std::max(
-    std::max(ProgInfo.NumVGPR, 1u), STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
+  ProgInfo.NumSGPRsForWavesPerEU = AMDGPUVariadicMCExpr::createMax(
+      {ProgInfo.NumSGPR, CreateExpr(1ul),
+       CreateExpr(STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()))},
+      Ctx);
+  ProgInfo.NumVGPRsForWavesPerEU = AMDGPUVariadicMCExpr::createMax(
+      {ProgInfo.NumVGPR, CreateExpr(1ul),
+       CreateExpr(STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()))},
+      Ctx);
 
   if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
       STM.hasSGPRInitBug()) {
     unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
-    if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
+    uint64_t NumSgpr;
+    if (TryGetMCExprValue(ProgInfo.NumSGPR, NumSgpr) &&
+        NumSgpr > MaxAddressableNumSGPRs) {
       // This can happen due to a compiler bug or when using inline asm to use
       // the registers which are usually reserved for vcc etc.
       LLVMContext &Ctx = MF.getFunction().getContext();
       DiagnosticInfoResourceLimit Diag(MF.getFunction(), "scalar registers",
-                                       ProgInfo.NumSGPR, MaxAddressableNumSGPRs,
+                                       NumSgpr, MaxAddressableNumSGPRs,
                                        DS_Error, DK_ResourceLimit);
       Ctx.diagnose(Diag);
-      ProgInfo.NumSGPR = MaxAddressableNumSGPRs;
-      ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;
+      ProgInfo.NumSGPR = CreateExpr(MaxAddressableNumSGPRs);
+      ProgInfo.NumSGPRsForWavesPerEU = CreateExpr(MaxAddressableNumSGPRs);
     }
   }
 
   if (STM.hasSGPRInitBug()) {
     ProgInfo.NumSGPR =
-        AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
+        CreateExpr(AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG);
     ProgInfo.NumSGPRsForWavesPerEU =
-        AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
+        CreateExpr(AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG);
   }
 
   if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {
@@ -871,11 +913,26 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
         STM.getAddressableLocalMemorySize(), DS_Error);
     Ctx.diagnose(Diag);
   }
+  // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
+  // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
+  auto GetNumGPRBlocks = [&CreateExpr, &Ctx](const MCExpr *NumGPR,
+                                             unsigned Granule) {
+    const MCExpr *OneConst = CreateExpr(1ul);
+    const MCExpr *GranuleConst = CreateExpr(Granule);
+    const MCExpr *MaxNumGPR =
+        AMDGPUVariadicMCExpr::createMax({NumGPR, OneConst}, Ctx);
+    const MCExpr *AlignToGPR =
+        AMDGPUVariadicMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
+    const MCExpr *DivGPR =
+        MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
+    const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
+    return SubGPR;
+  };
 
-  ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
-      &STM, ProgInfo.NumSGPRsForWavesPerEU);
-  ProgInfo.VGPRBlocks =
-      IsaInfo::getEncodedNumVGPRBlocks(&STM, ProgInfo.NumVGPRsForWavesPerEU);
+  ProgInfo.SGPRBlocks = GetNumGPRBlocks(ProgInfo.NumSGPRsForWavesPerEU,
+                                        IsaInfo::getSGPREncodingGranule(&STM));
+  ProgInfo.VGPRBlocks = GetNumGPRBlocks(ProgInfo.NumVGPRsForWavesPerEU,
+                                        IsaInfo::getVGPREncodingGranule(&STM));
 
   const SIModeRegisterDefaults Mode = MFI->getMode();
 
@@ -904,14 +961,23 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   ProgInfo.LDSBlocks =
       alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
 
+  // The MCExpr equivalent of divideCeil.
+  auto DivideCeil = [&Ctx](const MCExpr *Numerator, const MCExpr *Denominator) {
+    const MCExpr *Ceil =
+        AMDGPUVariadicMCExpr::createAlignTo(Numerator, Denominator, Ctx);
+    return MCBinaryExpr::createDiv(Ceil, Denominator, Ctx);
+  };
+
   // Scratch is allocated in 64-dword or 256-dword blocks.
   unsigned ScratchAlignShift =
       STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? 8 : 10;
   // We need to program the hardware with the amount of scratch memory that
   // is used by the entire wave.  ProgInfo.ScratchSize is the amount of
   // scratch memory used per thread.
-  ProgInfo.ScratchBlocks = divideCeil(
-      ProgInfo.ScratchSize * STM.getWavefrontSize(), 1ULL << ScratchAlignShift);
+  ProgInfo.ScratchBlocks = DivideCeil(
+      MCBinaryExpr::createMul(ProgInfo.ScratchSize,
+                              CreateExpr(STM.getWavefrontSize()), Ctx),
+      CreateExpr(1ULL << ScratchAlignShift));
 
   if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
     ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
@@ -930,8 +996,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   // anything to disable it if we know the stack isn't used here. We may still
   // have emitted code reading it to initialize scratch, but if that's unused
   // reading garbage should be OK.
-  ProgInfo.ScratchEnable =
-      ProgInfo.ScratchBlocks > 0 || ProgInfo.DynamicCallStack;
+  ProgInfo.ScratchEnable = MCBinaryExpr::createLOr(
+      MCBinaryExpr::createGT(ProgInfo.ScratchBlocks,
+                             MCConstantExpr::create(0, Ctx), Ctx),
+      ProgInfo.DynamicCallStack, Ctx);
+
   ProgInfo.UserSGPR = MFI->getNumUserSGPRs();
   // For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.
   ProgInfo.TrapHandlerEnable =
@@ -947,26 +1016,41 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   ProgInfo.EXCPEnable = 0;
 
   if (STM.hasGFX90AInsts()) {
-    AMDHSA_BITS_SET(ProgInfo.ComputePGMRSrc3GFX90A,
-                    amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
-                    ProgInfo.AccumOffset);
-    AMDHSA_BITS_SET(ProgInfo.ComputePGMRSrc3GFX90A,
-                    amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
-                    ProgInfo.TgSplit);
+    // return ((Dst & ~Mask) | (Value << Shift))
+    auto SetBits = [&Ctx](const MCExpr *Dst, const MCExpr *Value, uint32_t Mask,
+         ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/88257


More information about the llvm-commits mailing list