[llvm] d86b68a - MCExpr-ify SIProgramInfo (#88257)

Thu May 9 05:02:36 PDT 2024

Author: Janek van Oirschot
Date: 2024-05-09T13:02:32+01:00
New Revision: d86b68afd7f0d7684adc312bcdc87f9027d0d896

URL: https://github.com/llvm/llvm-project/commit/d86b68afd7f0d7684adc312bcdc87f9027d0d896
DIFF: https://github.com/llvm/llvm-project/commit/d86b68afd7f0d7684adc312bcdc87f9027d0d896.diff

LOG: MCExpr-ify SIProgramInfo (#88257)

Convert members in SIProgramInfo affected by variables provided by AMDGPUResourceUsageAnalysis into MCExprs.

Added: 
    llvm/test/MC/AMDGPU/alignto_mcexpr.s
    llvm/test/MC/AMDGPU/extrasgprs_mcexpr.s
    llvm/test/MC/AMDGPU/occupancy_mcexpr.s
    llvm/test/MC/AMDGPU/totalnumvgpr_mcexpr.s
    llvm/unittests/MC/AMDGPU/SIProgramInfoMCExprs.cpp

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
    llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
    llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
    llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
    llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h
    llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
    llvm/lib/Target/AMDGPU/SIProgramInfo.h
    llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
    llvm/unittests/MC/AMDGPU/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 89a5ceac629b1..de81904143b7b 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -22,6 +22,7 @@
 #include "AMDKernelCodeT.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUMCExpr.h"
 #include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
 #include "R600AsmPrinter.h"
@@ -134,6 +135,15 @@ void AMDGPUAsmPrinter::initTargetStreamer(Module &M) {
     getTargetStreamer()->getPALMetadata()->readFromIR(M);
 }
 
+uint64_t AMDGPUAsmPrinter::getMCExprValue(const MCExpr *Value, MCContext &Ctx) {
+  int64_t Val;
+  if (!Value->evaluateAsAbsolute(Val)) {
+    Ctx.reportError(SMLoc(), "could not resolve expression when required.");
+    return 0;
+  }
+  return static_cast<uint64_t>(Val);
+}
+
 void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) {
   // Init target streamer if it has not yet happened
   if (!IsTargetStreamerInitialized)
@@ -237,12 +247,14 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
   getNameWithPrefix(KernelName, &MF->getFunction());
   getTargetStreamer()->EmitAmdhsaKernelDescriptor(
       STM, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
-      CurrentProgramInfo.NumVGPRsForWavesPerEU,
-      CurrentProgramInfo.NumSGPRsForWavesPerEU -
+      getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU, Context),
+      getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU, Context) -
           IsaInfo::getNumExtraSGPRs(
-              &STM, CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
+              &STM, getMCExprValue(CurrentProgramInfo.VCCUsed, Context),
+              getMCExprValue(CurrentProgramInfo.FlatUsed, Context),
               getTargetStreamer()->getTargetID()->isXnackOnOrAny()),
-      CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);
+      getMCExprValue(CurrentProgramInfo.VCCUsed, Context),
+      getMCExprValue(CurrentProgramInfo.FlatUsed, Context));
 
   Streamer.popSection();
 }
@@ -422,7 +434,7 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
   }
 
-  if (CurrentProgramInfo.DynamicCallStack &&
+  if (getMCExprValue(CurrentProgramInfo.DynamicCallStack, MF.getContext()) &&
       CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
     KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK;
 
@@ -439,29 +451,22 @@ AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF,
 
   MCKernelDescriptor KernelDescriptor;
 
-  assert(isUInt<32>(PI.ScratchSize));
-  assert(isUInt<32>(PI.getComputePGMRSrc1(STM)));
-  assert(isUInt<32>(PI.getComputePGMRSrc2()));
-
   KernelDescriptor.group_segment_fixed_size =
       MCConstantExpr::create(PI.LDSSize, Ctx);
-  KernelDescriptor.private_segment_fixed_size =
-      MCConstantExpr::create(PI.ScratchSize, Ctx);
+  KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
 
   Align MaxKernArgAlign;
   KernelDescriptor.kernarg_size = MCConstantExpr::create(
       STM.getKernArgSegmentSize(F, MaxKernArgAlign), Ctx);
 
-  KernelDescriptor.compute_pgm_rsrc1 =
-      MCConstantExpr::create(PI.getComputePGMRSrc1(STM), Ctx);
-  KernelDescriptor.compute_pgm_rsrc2 =
-      MCConstantExpr::create(PI.getComputePGMRSrc2(), Ctx);
+  KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM, Ctx);
+  KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2(Ctx);
   KernelDescriptor.kernel_code_properties =
       MCConstantExpr::create(getAmdhsaKernelCodeProperties(MF), Ctx);
 
-  assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
-  KernelDescriptor.compute_pgm_rsrc3 = MCConstantExpr::create(
-      STM.hasGFX90AInsts() ? CurrentProgramInfo.ComputePGMRSrc3GFX90A : 0, Ctx);
+  assert(STM.hasGFX90AInsts() ||
+         getMCExprValue(CurrentProgramInfo.ComputePGMRSrc3GFX90A, Ctx) == 0);
+  KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3GFX90A;
 
   KernelDescriptor.kernarg_preload = MCConstantExpr::create(
       AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0,
@@ -477,9 +482,10 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
     initTargetStreamer(*MF.getFunction().getParent());
 
   ResourceUsage = &getAnalysis<AMDGPUResourceUsageAnalysis>();
-  CurrentProgramInfo = SIProgramInfo();
+  CurrentProgramInfo.reset(MF);
 
   const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
+  MCContext &Ctx = MF.getContext();
 
   // The starting address of all shader programs must be 256 bytes aligned.
   // Regular functions just need the basic required instruction alignment.
@@ -550,11 +556,13 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
     OutStreamer->emitRawComment(" Kernel info:", false);
     emitCommonFunctionComments(
-        CurrentProgramInfo.NumArchVGPR,
-        STM.hasMAIInsts() ? CurrentProgramInfo.NumAccVGPR
+        getMCExprValue(CurrentProgramInfo.NumArchVGPR, Ctx),
+        STM.hasMAIInsts() ? getMCExprValue(CurrentProgramInfo.NumAccVGPR, Ctx)
                           : std::optional<uint32_t>(),
-        CurrentProgramInfo.NumVGPR, CurrentProgramInfo.NumSGPR,
-        CurrentProgramInfo.ScratchSize, getFunctionCodeSize(MF), MFI);
+        getMCExprValue(CurrentProgramInfo.NumVGPR, Ctx),
+        getMCExprValue(CurrentProgramInfo.NumSGPR, Ctx),
+        getMCExprValue(CurrentProgramInfo.ScratchSize, Ctx),
+        getFunctionCodeSize(MF), MFI);
 
     OutStreamer->emitRawComment(
       " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);
@@ -565,32 +573,44 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
       " bytes/workgroup (compile time only)", false);
 
     OutStreamer->emitRawComment(
-      " SGPRBlocks: " + Twine(CurrentProgramInfo.SGPRBlocks), false);
+        " SGPRBlocks: " +
+            Twine(getMCExprValue(CurrentProgramInfo.SGPRBlocks, Ctx)),
+        false);
     OutStreamer->emitRawComment(
-      " VGPRBlocks: " + Twine(CurrentProgramInfo.VGPRBlocks), false);
+        " VGPRBlocks: " +
+            Twine(getMCExprValue(CurrentProgramInfo.VGPRBlocks, Ctx)),
+        false);
 
     OutStreamer->emitRawComment(
-      " NumSGPRsForWavesPerEU: " +
-      Twine(CurrentProgramInfo.NumSGPRsForWavesPerEU), false);
+        " NumSGPRsForWavesPerEU: " +
+            Twine(
+                getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx)),
+        false);
     OutStreamer->emitRawComment(
-      " NumVGPRsForWavesPerEU: " +
-      Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false);
+        " NumVGPRsForWavesPerEU: " +
+            Twine(
+                getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx)),
+        false);
 
     if (STM.hasGFX90AInsts())
       OutStreamer->emitRawComment(
-        " AccumOffset: " +
-        Twine((CurrentProgramInfo.AccumOffset + 1) * 4), false);
+          " AccumOffset: " +
+              Twine((getMCExprValue(CurrentProgramInfo.AccumOffset, Ctx) + 1) *
+                    4),
+          false);
 
     OutStreamer->emitRawComment(
-      " Occupancy: " +
-      Twine(CurrentProgramInfo.Occupancy), false);
+        " Occupancy: " +
+            Twine(getMCExprValue(CurrentProgramInfo.Occupancy, Ctx)),
+        false);
 
     OutStreamer->emitRawComment(
       " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
 
-    OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
-                                    Twine(CurrentProgramInfo.ScratchEnable),
-                                false);
+    OutStreamer->emitRawComment(
+        " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
+            Twine(getMCExprValue(CurrentProgramInfo.ScratchEnable, Ctx)),
+        false);
     OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
                                     Twine(CurrentProgramInfo.UserSGPR),
                                 false);
@@ -611,18 +631,20 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
                                 false);
 
     assert(STM.hasGFX90AInsts() ||
-           CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
+           getMCExprValue(CurrentProgramInfo.ComputePGMRSrc3GFX90A, Ctx) == 0);
     if (STM.hasGFX90AInsts()) {
       OutStreamer->emitRawComment(
-        " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
-        Twine((AMDHSA_BITS_GET(CurrentProgramInfo.ComputePGMRSrc3GFX90A,
-                               amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET))),
-                               false);
+          " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
+              Twine((AMDHSA_BITS_GET(
+                  getMCExprValue(CurrentProgramInfo.ComputePGMRSrc3GFX90A, Ctx),
+                  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET))),
+          false);
       OutStreamer->emitRawComment(
-        " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
-        Twine((AMDHSA_BITS_GET(CurrentProgramInfo.ComputePGMRSrc3GFX90A,
-                               amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT))),
-                               false);
+          " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
+              Twine((AMDHSA_BITS_GET(
+                  getMCExprValue(CurrentProgramInfo.ComputePGMRSrc3GFX90A, Ctx),
+                  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT))),
+          false);
     }
   }
 
@@ -702,23 +724,40 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo &Info =
       ResourceUsage->getResourceInfo(&MF.getFunction());
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
+  MCContext &Ctx = MF.getContext();
+
+  auto CreateExpr = [&Ctx](int64_t Value) {
+    return MCConstantExpr::create(Value, Ctx);
+  };
 
-  ProgInfo.NumArchVGPR = Info.NumVGPR;
-  ProgInfo.NumAccVGPR = Info.NumAGPR;
-  ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM);
-  ProgInfo.AccumOffset = alignTo(std::max(1, Info.NumVGPR), 4) / 4 - 1;
+  auto TryGetMCExprValue = [&Ctx](const MCExpr *Value, uint64_t &Res) -> bool {
+    int64_t Val;
+    if (Value->evaluateAsAbsolute(Val)) {
+      Res = Val;
+      return true;
+    }
+    return false;
+  };
+
+  ProgInfo.NumArchVGPR = CreateExpr(Info.NumVGPR);
+  ProgInfo.NumAccVGPR = CreateExpr(Info.NumAGPR);
+  ProgInfo.NumVGPR = CreateExpr(Info.getTotalNumVGPRs(STM));
+  ProgInfo.AccumOffset =
+      CreateExpr(alignTo(std::max(1, Info.NumVGPR), 4) / 4 - 1);
   ProgInfo.TgSplit = STM.isTgSplitEnabled();
-  ProgInfo.NumSGPR = Info.NumExplicitSGPR;
-  ProgInfo.ScratchSize = Info.PrivateSegmentSize;
-  ProgInfo.VCCUsed = Info.UsesVCC;
-  ProgInfo.FlatUsed = Info.UsesFlatScratch;
-  ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
+  ProgInfo.NumSGPR = CreateExpr(Info.NumExplicitSGPR);
+  ProgInfo.ScratchSize = CreateExpr(Info.PrivateSegmentSize);
+  ProgInfo.VCCUsed = CreateExpr(Info.UsesVCC);
+  ProgInfo.FlatUsed = CreateExpr(Info.UsesFlatScratch);
+  ProgInfo.DynamicCallStack =
+      CreateExpr(Info.HasDynamicallySizedStack || Info.HasRecursion);
 
   const uint64_t MaxScratchPerWorkitem =
       STM.getMaxWaveScratchSize() / STM.getWavefrontSize();
-  if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) {
-    DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
-                                          ProgInfo.ScratchSize,
+  uint64_t ScratchSize;
+  if (TryGetMCExprValue(ProgInfo.ScratchSize, ScratchSize) &&
+      ScratchSize > MaxScratchPerWorkitem) {
+    DiagnosticInfoStackSize DiagStackSize(MF.getFunction(), ScratchSize,
                                           MaxScratchPerWorkitem, DS_Error);
     MF.getFunction().getContext().diagnose(DiagStackSize);
   }
@@ -728,27 +767,29 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   // The calculations related to SGPR/VGPR blocks are
   // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
   // unified.
-  unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
-      &STM, ProgInfo.VCCUsed, ProgInfo.FlatUsed,
-      getTargetStreamer()->getTargetID()->isXnackOnOrAny());
+  const MCExpr *ExtraSGPRs = AMDGPUVariadicMCExpr::createExtraSGPRs(
+      ProgInfo.VCCUsed, ProgInfo.FlatUsed,
+      getTargetStreamer()->getTargetID()->isXnackOnOrAny(), Ctx);
 
   // Check the addressable register limit before we add ExtraSGPRs.
   if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
       !STM.hasSGPRInitBug()) {
     unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
-    if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
+    uint64_t NumSgpr;
+    if (TryGetMCExprValue(ProgInfo.NumSGPR, NumSgpr) &&
+        NumSgpr > MaxAddressableNumSGPRs) {
       // This can happen due to a compiler bug or when using inline asm.
       LLVMContext &Ctx = MF.getFunction().getContext();
       DiagnosticInfoResourceLimit Diag(
-          MF.getFunction(), "addressable scalar registers", ProgInfo.NumSGPR,
+          MF.getFunction(), "addressable scalar registers", NumSgpr,
           MaxAddressableNumSGPRs, DS_Error, DK_ResourceLimit);
       Ctx.diagnose(Diag);
-      ProgInfo.NumSGPR = MaxAddressableNumSGPRs - 1;
+      ProgInfo.NumSGPR = CreateExpr(MaxAddressableNumSGPRs - 1);
     }
   }
 
   // Account for extra SGPRs and VGPRs reserved for debugger use.
-  ProgInfo.NumSGPR += ExtraSGPRs;
+  ProgInfo.NumSGPR = MCBinaryExpr::createAdd(ProgInfo.NumSGPR, ExtraSGPRs, Ctx);
 
   const Function &F = MF.getFunction();
 
@@ -819,40 +860,51 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
         }
       }
     }
-    ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
-    ProgInfo.NumArchVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
-    ProgInfo.NumVGPR =
-        Info.getTotalNumVGPRs(STM, Info.NumAGPR, ProgInfo.NumArchVGPR);
+    ProgInfo.NumSGPR = AMDGPUVariadicMCExpr::createMax(
+        {ProgInfo.NumSGPR, CreateExpr(WaveDispatchNumSGPR)}, Ctx);
+
+    ProgInfo.NumArchVGPR = AMDGPUVariadicMCExpr::createMax(
+        {ProgInfo.NumVGPR, CreateExpr(WaveDispatchNumVGPR)}, Ctx);
+
+    ProgInfo.NumVGPR = AMDGPUVariadicMCExpr::createTotalNumVGPR(
+        ProgInfo.NumAccVGPR, ProgInfo.NumArchVGPR, Ctx);
   }
 
   // Adjust number of registers used to meet default/requested minimum/maximum
   // number of waves per execution unit request.
-  ProgInfo.NumSGPRsForWavesPerEU = std::max(
-    std::max(ProgInfo.NumSGPR, 1u), STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
-  ProgInfo.NumVGPRsForWavesPerEU = std::max(
-    std::max(ProgInfo.NumVGPR, 1u), STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
+  unsigned MaxWaves = MFI->getMaxWavesPerEU();
+  ProgInfo.NumSGPRsForWavesPerEU = AMDGPUVariadicMCExpr::createMax(
+      {ProgInfo.NumSGPR, CreateExpr(1ul),
+       CreateExpr(STM.getMinNumSGPRs(MaxWaves))},
+      Ctx);
+  ProgInfo.NumVGPRsForWavesPerEU = AMDGPUVariadicMCExpr::createMax(
+      {ProgInfo.NumVGPR, CreateExpr(1ul),
+       CreateExpr(STM.getMinNumVGPRs(MaxWaves))},
+      Ctx);
 
   if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
       STM.hasSGPRInitBug()) {
     unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
-    if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
+    uint64_t NumSgpr;
+    if (TryGetMCExprValue(ProgInfo.NumSGPR, NumSgpr) &&
+        NumSgpr > MaxAddressableNumSGPRs) {
       // This can happen due to a compiler bug or when using inline asm to use
       // the registers which are usually reserved for vcc etc.
       LLVMContext &Ctx = MF.getFunction().getContext();
       DiagnosticInfoResourceLimit Diag(MF.getFunction(), "scalar registers",
-                                       ProgInfo.NumSGPR, MaxAddressableNumSGPRs,
+                                       NumSgpr, MaxAddressableNumSGPRs,
                                        DS_Error, DK_ResourceLimit);
       Ctx.diagnose(Diag);
-      ProgInfo.NumSGPR = MaxAddressableNumSGPRs;
-      ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;
+      ProgInfo.NumSGPR = CreateExpr(MaxAddressableNumSGPRs);
+      ProgInfo.NumSGPRsForWavesPerEU = CreateExpr(MaxAddressableNumSGPRs);
     }
   }
 
   if (STM.hasSGPRInitBug()) {
     ProgInfo.NumSGPR =
-        AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
+        CreateExpr(AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG);
     ProgInfo.NumSGPRsForWavesPerEU =
-        AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
+        CreateExpr(AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG);
   }
 
   if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {
@@ -871,11 +923,26 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
         STM.getAddressableLocalMemorySize(), DS_Error);
     Ctx.diagnose(Diag);
   }
+  // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
+  // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
+  auto GetNumGPRBlocks = [&CreateExpr, &Ctx](const MCExpr *NumGPR,
+                                             unsigned Granule) {
+    const MCExpr *OneConst = CreateExpr(1ul);
+    const MCExpr *GranuleConst = CreateExpr(Granule);
+    const MCExpr *MaxNumGPR =
+        AMDGPUVariadicMCExpr::createMax({NumGPR, OneConst}, Ctx);
+    const MCExpr *AlignToGPR =
+        AMDGPUVariadicMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
+    const MCExpr *DivGPR =
+        MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
+    const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
+    return SubGPR;
+  };
 
-  ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
-      &STM, ProgInfo.NumSGPRsForWavesPerEU);
-  ProgInfo.VGPRBlocks =
-      IsaInfo::getEncodedNumVGPRBlocks(&STM, ProgInfo.NumVGPRsForWavesPerEU);
+  ProgInfo.SGPRBlocks = GetNumGPRBlocks(ProgInfo.NumSGPRsForWavesPerEU,
+                                        IsaInfo::getSGPREncodingGranule(&STM));
+  ProgInfo.VGPRBlocks = GetNumGPRBlocks(ProgInfo.NumVGPRsForWavesPerEU,
+                                        IsaInfo::getVGPREncodingGranule(&STM));
 
   const SIModeRegisterDefaults Mode = MFI->getMode();
 
@@ -904,14 +971,23 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   ProgInfo.LDSBlocks =
       alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
 
+  // The MCExpr equivalent of divideCeil.
+  auto DivideCeil = [&Ctx](const MCExpr *Numerator, const MCExpr *Denominator) {
+    const MCExpr *Ceil =
+        AMDGPUVariadicMCExpr::createAlignTo(Numerator, Denominator, Ctx);
+    return MCBinaryExpr::createDiv(Ceil, Denominator, Ctx);
+  };
+
   // Scratch is allocated in 64-dword or 256-dword blocks.
   unsigned ScratchAlignShift =
       STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? 8 : 10;
   // We need to program the hardware with the amount of scratch memory that
   // is used by the entire wave.  ProgInfo.ScratchSize is the amount of
   // scratch memory used per thread.
-  ProgInfo.ScratchBlocks = divideCeil(
-      ProgInfo.ScratchSize * STM.getWavefrontSize(), 1ULL << ScratchAlignShift);
+  ProgInfo.ScratchBlocks = DivideCeil(
+      MCBinaryExpr::createMul(ProgInfo.ScratchSize,
+                              CreateExpr(STM.getWavefrontSize()), Ctx),
+      CreateExpr(1ULL << ScratchAlignShift));
 
   if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
     ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
@@ -930,8 +1006,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   // anything to disable it if we know the stack isn't used here. We may still
   // have emitted code reading it to initialize scratch, but if that's unused
   // reading garbage should be OK.
-  ProgInfo.ScratchEnable =
-      ProgInfo.ScratchBlocks > 0 || ProgInfo.DynamicCallStack;
+  ProgInfo.ScratchEnable = MCBinaryExpr::createLOr(
+      MCBinaryExpr::createGT(ProgInfo.ScratchBlocks,
+                             MCConstantExpr::create(0, Ctx), Ctx),
+      ProgInfo.DynamicCallStack, Ctx);
+
   ProgInfo.UserSGPR = MFI->getNumUserSGPRs();
   // For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.
   ProgInfo.TrapHandlerEnable =
@@ -947,26 +1026,41 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   ProgInfo.EXCPEnable = 0;
 
   if (STM.hasGFX90AInsts()) {
-    AMDHSA_BITS_SET(ProgInfo.ComputePGMRSrc3GFX90A,
-                    amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
-                    ProgInfo.AccumOffset);
-    AMDHSA_BITS_SET(ProgInfo.ComputePGMRSrc3GFX90A,
-                    amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
-                    ProgInfo.TgSplit);
+    // return ((Dst & ~Mask) | (Value << Shift))
+    auto SetBits = [&Ctx](const MCExpr *Dst, const MCExpr *Value, uint32_t Mask,
+                          uint32_t Shift) {
+      auto Shft = MCConstantExpr::create(Shift, Ctx);
+      auto Msk = MCConstantExpr::create(Mask, Ctx);
+      Dst = MCBinaryExpr::createAnd(Dst, MCUnaryExpr::createNot(Msk, Ctx), Ctx);
+      Dst = MCBinaryExpr::createOr(
+          Dst, MCBinaryExpr::createShl(Value, Shft, Ctx), Ctx);
+      return Dst;
+    };
+
+    ProgInfo.ComputePGMRSrc3GFX90A =
+        SetBits(ProgInfo.ComputePGMRSrc3GFX90A, ProgInfo.AccumOffset,
+                amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
+                amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT);
+    ProgInfo.ComputePGMRSrc3GFX90A =
+        SetBits(ProgInfo.ComputePGMRSrc3GFX90A, CreateExpr(ProgInfo.TgSplit),
+                amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
+                amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT);
   }
 
-  ProgInfo.Occupancy = STM.computeOccupancy(MF.getFunction(), ProgInfo.LDSSize,
-                                            ProgInfo.NumSGPRsForWavesPerEU,
-                                            ProgInfo.NumVGPRsForWavesPerEU);
+  ProgInfo.Occupancy = AMDGPUVariadicMCExpr::createOccupancy(
+      STM.computeOccupancy(F, ProgInfo.LDSSize), ProgInfo.NumSGPRsForWavesPerEU,
+      ProgInfo.NumVGPRsForWavesPerEU, STM, Ctx);
+
   const auto [MinWEU, MaxWEU] =
       AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", {0, 0}, true);
-  if (ProgInfo.Occupancy < MinWEU) {
+  uint64_t Occupancy;
+  if (TryGetMCExprValue(ProgInfo.Occupancy, Occupancy) && Occupancy < MinWEU) {
     DiagnosticInfoOptimizationFailure Diag(
         F, F.getSubprogram(),
         "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "
         "'" +
             F.getName() + "': desired occupancy was " + Twine(MinWEU) +
-            ", final occupancy is " + Twine(ProgInfo.Occupancy));
+            ", final occupancy is " + Twine(Occupancy));
     F.getContext().diagnose(Diag);
   }
 }
@@ -989,36 +1083,78 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
   unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
+  MCContext &Ctx = MF.getContext();
+
+  // (((Value) & Mask) << Shift)
+  auto SetBits = [&Ctx](const MCExpr *Value, uint32_t Mask, uint32_t Shift) {
+    const MCExpr *msk = MCConstantExpr::create(Mask, Ctx);
+    const MCExpr *shft = MCConstantExpr::create(Shift, Ctx);
+    return MCBinaryExpr::createShl(MCBinaryExpr::createAnd(Value, msk, Ctx),
+                                   shft, Ctx);
+  };
+
+  auto EmitResolvedOrExpr = [this](const MCExpr *Value, unsigned Size) {
+    int64_t Val;
+    if (Value->evaluateAsAbsolute(Val))
+      OutStreamer->emitIntValue(static_cast<uint64_t>(Val), Size);
+    else
+      OutStreamer->emitValue(Value, Size);
+  };
 
   if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
     OutStreamer->emitInt32(R_00B848_COMPUTE_PGM_RSRC1);
 
-    OutStreamer->emitInt32(CurrentProgramInfo.getComputePGMRSrc1(STM));
+    EmitResolvedOrExpr(CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx),
+                       /*Size=*/4);
 
     OutStreamer->emitInt32(R_00B84C_COMPUTE_PGM_RSRC2);
-    OutStreamer->emitInt32(CurrentProgramInfo.getComputePGMRSrc2());
+    EmitResolvedOrExpr(CurrentProgramInfo.getComputePGMRSrc2(Ctx), /*Size=*/4);
 
     OutStreamer->emitInt32(R_00B860_COMPUTE_TMPRING_SIZE);
-    OutStreamer->emitInt32(
-        STM.getGeneration() >= AMDGPUSubtarget::GFX12
-            ? S_00B860_WAVESIZE_GFX12Plus(CurrentProgramInfo.ScratchBlocks)
-        : STM.getGeneration() == AMDGPUSubtarget::GFX11
-            ? S_00B860_WAVESIZE_GFX11(CurrentProgramInfo.ScratchBlocks)
-            : S_00B860_WAVESIZE_PreGFX11(CurrentProgramInfo.ScratchBlocks));
+
+    // Sets bits according to S_0286E8_WAVESIZE_* mask and shift values for the
+    // appropriate generation.
+    if (STM.getGeneration() >= AMDGPUSubtarget::GFX12) {
+      EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
+                                 /*Mask=*/0x3FFFF, /*Shift=*/12),
+                         /*Size=*/4);
+    } else if (STM.getGeneration() == AMDGPUSubtarget::GFX11) {
+      EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
+                                 /*Mask=*/0x7FFF, /*Shift=*/12),
+                         /*Size=*/4);
+    } else {
+      EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
+                                 /*Mask=*/0x1FFF, /*Shift=*/12),
+                         /*Size=*/4);
+    }
 
     // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
     // 0" comment but I don't see a corresponding field in the register spec.
   } else {
     OutStreamer->emitInt32(RsrcReg);
-    OutStreamer->emitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
-                              S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
+
+    const MCExpr *GPRBlocks = MCBinaryExpr::createOr(
+        SetBits(CurrentProgramInfo.VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0),
+        SetBits(CurrentProgramInfo.SGPRBlocks, /*Mask=*/0x0F, /*Shift=*/6),
+        MF.getContext());
+    EmitResolvedOrExpr(GPRBlocks, /*Size=*/4);
     OutStreamer->emitInt32(R_0286E8_SPI_TMPRING_SIZE);
-    OutStreamer->emitInt32(
-        STM.getGeneration() >= AMDGPUSubtarget::GFX12
-            ? S_0286E8_WAVESIZE_GFX12Plus(CurrentProgramInfo.ScratchBlocks)
-        : STM.getGeneration() == AMDGPUSubtarget::GFX11
-            ? S_0286E8_WAVESIZE_GFX11(CurrentProgramInfo.ScratchBlocks)
-            : S_0286E8_WAVESIZE_PreGFX11(CurrentProgramInfo.ScratchBlocks));
+
+    // Sets bits according to S_0286E8_WAVESIZE_* mask and shift values for the
+    // appropriate generation.
+    if (STM.getGeneration() >= AMDGPUSubtarget::GFX12) {
+      EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
+                                 /*Mask=*/0x3FFFF, /*Shift=*/12),
+                         /*Size=*/4);
+    } else if (STM.getGeneration() == AMDGPUSubtarget::GFX11) {
+      EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
+                                 /*Mask=*/0x7FFF, /*Shift=*/12),
+                         /*Size=*/4);
+    } else {
+      EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
+                                 /*Mask=*/0x1FFF, /*Shift=*/12),
+                         /*Size=*/4);
+    }
   }
 
   if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
@@ -1070,33 +1206,38 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   auto CC = MF.getFunction().getCallingConv();
   auto MD = getTargetStreamer()->getPALMetadata();
+  auto &Ctx = MF.getContext();
 
   MD->setEntryPoint(CC, MF.getFunction().getName());
-  MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU);
+  MD->setNumUsedVgprs(
+      CC, getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx));
 
   // Only set AGPRs for supported devices
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
   if (STM.hasMAIInsts()) {
-    MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR);
+    MD->setNumUsedAgprs(CC, getMCExprValue(CurrentProgramInfo.NumAccVGPR, Ctx));
   }
 
-  MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU);
+  MD->setNumUsedSgprs(
+      CC, getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx));
   if (MD->getPALMajorVersion() < 3) {
     MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM));
     if (AMDGPU::isCompute(CC)) {
       MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2());
     } else {
-      if (CurrentProgramInfo.ScratchBlocks > 0)
+      if (getMCExprValue(CurrentProgramInfo.ScratchBlocks, Ctx) > 0)
         MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1));
     }
   } else {
     MD->setHwStage(CC, ".debug_mode", (bool)CurrentProgramInfo.DebugMode);
-    MD->setHwStage(CC, ".scratch_en", (bool)CurrentProgramInfo.ScratchEnable);
+    MD->setHwStage(CC, ".scratch_en",
+                   (bool)getMCExprValue(CurrentProgramInfo.ScratchEnable, Ctx));
     EmitPALMetadataCommon(MD, CurrentProgramInfo, CC, STM);
   }
 
   // ScratchSize is in bytes, 16 aligned.
-  MD->setScratchSize(CC, alignTo(CurrentProgramInfo.ScratchSize, 16));
+  MD->setScratchSize(
+      CC, alignTo(getMCExprValue(CurrentProgramInfo.ScratchSize, Ctx), 16));
   if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
     unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11
                                 ? divideCeil(CurrentProgramInfo.LDSBlocks, 2)
@@ -1145,6 +1286,7 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
   StringRef FnName = MF.getFunction().getName();
   MD->setFunctionScratchSize(FnName, MFI.getStackSize());
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  MCContext &Ctx = MF.getContext();
 
   if (MD->getPALMajorVersion() < 3) {
     // Set compute registers
@@ -1158,8 +1300,10 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
 
   // Set optional info
   MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize);
-  MD->setFunctionNumUsedVgprs(FnName, CurrentProgramInfo.NumVGPRsForWavesPerEU);
-  MD->setFunctionNumUsedSgprs(FnName, CurrentProgramInfo.NumSGPRsForWavesPerEU);
+  MD->setFunctionNumUsedVgprs(
+      FnName, getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx));
+  MD->setFunctionNumUsedSgprs(
+      FnName, getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx));
 }
 
 // This is supposed to be log2(Size)
@@ -1185,6 +1329,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
 
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
+  MCContext &Ctx = MF.getContext();
 
   AMDGPU::initDefaultAMDKernelCodeT(Out, &STM);
 
@@ -1193,7 +1338,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
       (CurrentProgramInfo.getComputePGMRSrc2() << 32);
   Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
 
-  if (CurrentProgramInfo.DynamicCallStack)
+  if (getMCExprValue(CurrentProgramInfo.DynamicCallStack, Ctx))
     Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK;
 
   AMD_HSA_BITS_SET(Out.code_properties,
@@ -1229,9 +1374,10 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
 
   Align MaxKernArgAlign;
   Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
-  Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
-  Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
-  Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
+  Out.wavefront_sgpr_count = getMCExprValue(CurrentProgramInfo.NumSGPR, Ctx);
+  Out.workitem_vgpr_count = getMCExprValue(CurrentProgramInfo.NumVGPR, Ctx);
+  Out.workitem_private_segment_byte_size =
+      getMCExprValue(CurrentProgramInfo.ScratchSize, Ctx);
   Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
 
   // kernarg_segment_alignment is specified as log of the alignment.
@@ -1322,19 +1468,28 @@ void AMDGPUAsmPrinter::emitResourceUsageRemarks(
   // remarks to simulate newlines. If and when clang does accept newlines, this
   // formatting should be aggregated into one remark with newlines to avoid
   // printing multiple diagnostic location and diag opts.
+  MCContext &MCCtx = MF.getContext();
   EmitResourceUsageRemark("FunctionName", "Function Name",
                           MF.getFunction().getName());
-  EmitResourceUsageRemark("NumSGPR", "SGPRs", CurrentProgramInfo.NumSGPR);
-  EmitResourceUsageRemark("NumVGPR", "VGPRs", CurrentProgramInfo.NumArchVGPR);
-  if (hasMAIInsts)
-    EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR);
-  EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",
-                          CurrentProgramInfo.ScratchSize);
+  EmitResourceUsageRemark("NumSGPR", "SGPRs",
+                          getMCExprValue(CurrentProgramInfo.NumSGPR, MCCtx));
+  EmitResourceUsageRemark(
+      "NumVGPR", "VGPRs",
+      getMCExprValue(CurrentProgramInfo.NumArchVGPR, MCCtx));
+  if (hasMAIInsts) {
+    EmitResourceUsageRemark(
+        "NumAGPR", "AGPRs",
+        getMCExprValue(CurrentProgramInfo.NumAccVGPR, MCCtx));
+  }
+  EmitResourceUsageRemark(
+      "ScratchSize", "ScratchSize [bytes/lane]",
+      getMCExprValue(CurrentProgramInfo.ScratchSize, MCCtx));
   StringRef DynamicStackStr =
-      CurrentProgramInfo.DynamicCallStack ? "True" : "False";
+      getMCExprValue(CurrentProgramInfo.DynamicCallStack, MCCtx) ? "True"
+                                                                 : "False";
   EmitResourceUsageRemark("DynamicStack", "Dynamic Stack", DynamicStackStr);
   EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",
-                          CurrentProgramInfo.Occupancy);
+                          getMCExprValue(CurrentProgramInfo.Occupancy, MCCtx));
   EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",
                           CurrentProgramInfo.SGPRSpill);
   EmitResourceUsageRemark("VGPRSpill", "VGPRs Spill",

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index b8b2718d293e6..16d8952a533ef 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -78,6 +78,8 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
 
   void initTargetStreamer(Module &M);
 
+  static uint64_t getMCExprValue(const MCExpr *Value, MCContext &Ctx);
+
 public:
   explicit AMDGPUAsmPrinter(TargetMachine &TM,
                             std::unique_ptr<MCStreamer> Streamer);

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 9e288ab50e170..7ab9ba2851332 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -19,6 +19,8 @@
 #include "SIMachineFunctionInfo.h"
 #include "SIProgramInfo.h"
 #include "llvm/IR/Module.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 using namespace llvm;
 
 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
@@ -462,6 +464,16 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
   const Function &F = MF.getFunction();
 
+  auto GetMCExprValue = [&MF](const MCExpr *Value) {
+    int64_t Val;
+    if (!Value->evaluateAsAbsolute(Val)) {
+      MCContext &Ctx = MF.getContext();
+      Ctx.reportError(SMLoc(), "could not resolve expression when required.");
+      Val = 0;
+    }
+    return static_cast<uint64_t>(Val);
+  };
+
   auto Kern = HSAMetadataDoc->getMapNode();
 
   Align MaxKernArgAlign;
@@ -470,10 +482,11 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
   Kern[".group_segment_fixed_size"] =
       Kern.getDocument()->getNode(ProgramInfo.LDSSize);
   Kern[".private_segment_fixed_size"] =
-      Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
-  if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
-    Kern[".uses_dynamic_stack"] =
-        Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack);
+      Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.ScratchSize));
+  if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) {
+    Kern[".uses_dynamic_stack"] = Kern.getDocument()->getNode(
+        static_cast<bool>(GetMCExprValue(ProgramInfo.DynamicCallStack)));
+  }
 
   if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP())
     Kern[".workgroup_processor_mode"] =
@@ -484,12 +497,15 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
       Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
   Kern[".wavefront_size"] =
       Kern.getDocument()->getNode(STM.getWavefrontSize());
-  Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
-  Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
+  Kern[".sgpr_count"] =
+      Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.NumSGPR));
+  Kern[".vgpr_count"] =
+      Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.NumVGPR));
 
   // Only add AGPR count to metadata for supported devices
   if (STM.hasMAIInsts()) {
-    Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR);
+    Kern[".agpr_count"] =
+        Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.NumAccVGPR));
   }
 
   Kern[".max_flat_workgroup_size"] =

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 9cfe81e5288ee..94ee4ac78142d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -674,29 +674,8 @@ bool GCNSubtarget::useVGPRIndexMode() const {
 bool GCNSubtarget::useAA() const { return UseAA; }
 
 unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
-  if (getGeneration() >= AMDGPUSubtarget::GFX10)
-    return getMaxWavesPerEU();
-
-  if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
-    if (SGPRs <= 80)
-      return 10;
-    if (SGPRs <= 88)
-      return 9;
-    if (SGPRs <= 100)
-      return 8;
-    return 7;
-  }
-  if (SGPRs <= 48)
-    return 10;
-  if (SGPRs <= 56)
-    return 9;
-  if (SGPRs <= 64)
-    return 8;
-  if (SGPRs <= 72)
-    return 7;
-  if (SGPRs <= 80)
-    return 6;
-  return 5;
+  return AMDGPU::IsaInfo::getOccupancyWithNumSGPRs(SGPRs, getMaxWavesPerEU(),
+                                                   getGeneration());
 }
 
 unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned NumVGPRs) const {

diff  --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 5ac245ac3b63d..d47a5f8ebb815 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -8399,12 +8399,16 @@ bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
     AGVK VK = StringSwitch<AGVK>(TokenId)
                   .Case("max", AGVK::AGVK_Max)
                   .Case("or", AGVK::AGVK_Or)
+                  .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
+                  .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
+                  .Case("alignto", AGVK::AGVK_AlignTo)
+                  .Case("occupancy", AGVK::AGVK_Occupancy)
                   .Default(AGVK::AGVK_None);
 
     if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
       SmallVector<const MCExpr *, 4> Exprs;
       uint64_t CommaCount = 0;
-      lex(); // Eat 'max'/'or'
+      lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
       lex(); // Eat '('
       while (true) {
         if (trySkipToken(AsmToken::RParen)) {

diff  --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
index 4578c33d92dce..159664faf983f 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
@@ -7,6 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPUMCExpr.h"
+#include "GCNSubtarget.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/IR/Function.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
@@ -16,6 +19,7 @@
 #include <optional>
 
 using namespace llvm;
+using namespace llvm::AMDGPU;
 
 AMDGPUVariadicMCExpr::AMDGPUVariadicMCExpr(VariadicKind Kind,
                                            ArrayRef<const MCExpr *> Args,
@@ -61,6 +65,18 @@ void AMDGPUVariadicMCExpr::printImpl(raw_ostream &OS,
   case AGVK_Max:
     OS << "max(";
     break;
+  case AGVK_ExtraSGPRs:
+    OS << "extrasgprs(";
+    break;
+  case AGVK_TotalNumVGPRs:
+    OS << "totalnumvgprs(";
+    break;
+  case AGVK_AlignTo:
+    OS << "alignto(";
+    break;
+  case AGVK_Occupancy:
+    OS << "occupancy(";
+    break;
   }
   for (auto It = Args.begin(); It != Args.end(); ++It) {
     (*It)->print(OS, MAI, /*InParens=*/false);
@@ -82,10 +98,151 @@ static int64_t op(AMDGPUVariadicMCExpr::VariadicKind Kind, int64_t Arg1,
   }
 }
 
+bool AMDGPUVariadicMCExpr::evaluateExtraSGPRs(MCValue &Res,
+                                              const MCAsmLayout *Layout,
+                                              const MCFixup *Fixup) const {
+  auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
+    MCValue MCVal;
+    if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) ||
+        !MCVal.isAbsolute())
+      return false;
+
+    ConstantValue = MCVal.getConstant();
+    return true;
+  };
+
+  assert(Args.size() == 3 &&
+         "AMDGPUVariadic Argument count incorrect for ExtraSGPRs");
+  const MCSubtargetInfo *STI = Ctx.getSubtargetInfo();
+  uint64_t VCCUsed = 0, FlatScrUsed = 0, XNACKUsed = 0;
+
+  bool Success = TryGetMCExprValue(Args[2], XNACKUsed);
+
+  assert(Success && "Arguments 3 for ExtraSGPRs should be a known constant");
+  if (!Success || !TryGetMCExprValue(Args[0], VCCUsed) ||
+      !TryGetMCExprValue(Args[1], FlatScrUsed))
+    return false;
+
+  uint64_t ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
+      STI, (bool)VCCUsed, (bool)FlatScrUsed, (bool)XNACKUsed);
+  Res = MCValue::get(ExtraSGPRs);
+  return true;
+}
+
+bool AMDGPUVariadicMCExpr::evaluateTotalNumVGPR(MCValue &Res,
+                                                const MCAsmLayout *Layout,
+                                                const MCFixup *Fixup) const {
+  auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
+    MCValue MCVal;
+    if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) ||
+        !MCVal.isAbsolute())
+      return false;
+
+    ConstantValue = MCVal.getConstant();
+    return true;
+  };
+  assert(Args.size() == 2 &&
+         "AMDGPUVariadic Argument count incorrect for TotalNumVGPRs");
+  const MCSubtargetInfo *STI = Ctx.getSubtargetInfo();
+  uint64_t NumAGPR = 0, NumVGPR = 0;
+
+  bool Has90AInsts = AMDGPU::isGFX90A(*STI);
+
+  if (!TryGetMCExprValue(Args[0], NumAGPR) ||
+      !TryGetMCExprValue(Args[1], NumVGPR))
+    return false;
+
+  uint64_t TotalNum = Has90AInsts && NumAGPR ? alignTo(NumVGPR, 4) + NumAGPR
+                                             : std::max(NumVGPR, NumAGPR);
+  Res = MCValue::get(TotalNum);
+  return true;
+}
+
+bool AMDGPUVariadicMCExpr::evaluateAlignTo(MCValue &Res,
+                                           const MCAsmLayout *Layout,
+                                           const MCFixup *Fixup) const {
+  auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
+    MCValue MCVal;
+    if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) ||
+        !MCVal.isAbsolute())
+      return false;
+
+    ConstantValue = MCVal.getConstant();
+    return true;
+  };
+
+  assert(Args.size() == 2 &&
+         "AMDGPUVariadic Argument count incorrect for AlignTo");
+  uint64_t Value = 0, Align = 0;
+  if (!TryGetMCExprValue(Args[0], Value) || !TryGetMCExprValue(Args[1], Align))
+    return false;
+
+  Res = MCValue::get(alignTo(Value, Align));
+  return true;
+}
+
+bool AMDGPUVariadicMCExpr::evaluateOccupancy(MCValue &Res,
+                                             const MCAsmLayout *Layout,
+                                             const MCFixup *Fixup) const {
+  auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
+    MCValue MCVal;
+    if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) ||
+        !MCVal.isAbsolute())
+      return false;
+
+    ConstantValue = MCVal.getConstant();
+    return true;
+  };
+  assert(Args.size() == 7 &&
+         "AMDGPUVariadic Argument count incorrect for Occupancy");
+  uint64_t InitOccupancy, MaxWaves, Granule, TargetTotalNumVGPRs, Generation,
+      NumSGPRs, NumVGPRs;
+
+  bool Success = true;
+  Success &= TryGetMCExprValue(Args[0], MaxWaves);
+  Success &= TryGetMCExprValue(Args[1], Granule);
+  Success &= TryGetMCExprValue(Args[2], TargetTotalNumVGPRs);
+  Success &= TryGetMCExprValue(Args[3], Generation);
+  Success &= TryGetMCExprValue(Args[4], InitOccupancy);
+
+  assert(Success && "Arguments 1 to 5 for Occupancy should be known constants");
+
+  if (!Success || !TryGetMCExprValue(Args[5], NumSGPRs) ||
+      !TryGetMCExprValue(Args[6], NumVGPRs))
+    return false;
+
+  unsigned Occupancy = InitOccupancy;
+  if (NumSGPRs)
+    Occupancy = std::min(
+        Occupancy, IsaInfo::getOccupancyWithNumSGPRs(
+                       NumSGPRs, MaxWaves,
+                       static_cast<AMDGPUSubtarget::Generation>(Generation)));
+  if (NumVGPRs)
+    Occupancy = std::min(Occupancy,
+                         IsaInfo::getNumWavesPerEUWithNumVGPRs(
+                             NumVGPRs, Granule, MaxWaves, TargetTotalNumVGPRs));
+
+  Res = MCValue::get(Occupancy);
+  return true;
+}
+
 bool AMDGPUVariadicMCExpr::evaluateAsRelocatableImpl(
     MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const {
   std::optional<int64_t> Total;
 
+  switch (Kind) {
+  default:
+    break;
+  case AGVK_ExtraSGPRs:
+    return evaluateExtraSGPRs(Res, Layout, Fixup);
+  case AGVK_AlignTo:
+    return evaluateAlignTo(Res, Layout, Fixup);
+  case AGVK_TotalNumVGPRs:
+    return evaluateTotalNumVGPR(Res, Layout, Fixup);
+  case AGVK_Occupancy:
+    return evaluateOccupancy(Res, Layout, Fixup);
+  }
+
   for (const MCExpr *Arg : Args) {
     MCValue ArgRes;
     if (!Arg->evaluateAsRelocatable(ArgRes, Layout, Fixup) ||
@@ -113,3 +270,47 @@ MCFragment *AMDGPUVariadicMCExpr::findAssociatedFragment() const {
   }
   return nullptr;
 }
+
+/// Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed
+/// are unresolvable but needed for further MCExprs). Derived from
+/// implementation of IsaInfo::getNumExtraSGPRs in AMDGPUBaseInfo.cpp.
+///
+const AMDGPUVariadicMCExpr *
+AMDGPUVariadicMCExpr::createExtraSGPRs(const MCExpr *VCCUsed,
+                                       const MCExpr *FlatScrUsed,
+                                       bool XNACKUsed, MCContext &Ctx) {
+
+  return create(AGVK_ExtraSGPRs,
+                {VCCUsed, FlatScrUsed, MCConstantExpr::create(XNACKUsed, Ctx)},
+                Ctx);
+}
+
+const AMDGPUVariadicMCExpr *AMDGPUVariadicMCExpr::createTotalNumVGPR(
+    const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx) {
+  return create(AGVK_TotalNumVGPRs, {NumAGPR, NumVGPR}, Ctx);
+}
+
+/// Mimics GCNSubtarget::computeOccupancy for MCExpr.
+///
+/// Remove dependency on GCNSubtarget and depend only only the necessary values
+/// for said occupancy computation. Should match computeOccupancy implementation
+/// without passing \p STM on.
+const AMDGPUVariadicMCExpr *
+AMDGPUVariadicMCExpr::createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs,
+                                      const MCExpr *NumVGPRs,
+                                      const GCNSubtarget &STM, MCContext &Ctx) {
+  unsigned MaxWaves = IsaInfo::getMaxWavesPerEU(&STM);
+  unsigned Granule = IsaInfo::getVGPRAllocGranule(&STM);
+  unsigned TargetTotalNumVGPRs = IsaInfo::getTotalNumVGPRs(&STM);
+  unsigned Generation = STM.getGeneration();
+
+  auto CreateExpr = [&Ctx](unsigned Value) {
+    return MCConstantExpr::create(Value, Ctx);
+  };
+
+  return create(AGVK_Occupancy,
+                {CreateExpr(MaxWaves), CreateExpr(Granule),
+                 CreateExpr(TargetTotalNumVGPRs), CreateExpr(Generation),
+                 CreateExpr(InitOcc), NumSGPRs, NumVGPRs},
+                Ctx);
+}

diff  --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h
index 238e0dea791b2..f92350b592350 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h
@@ -14,6 +14,9 @@
 
 namespace llvm {
 
+class Function;
+class GCNSubtarget;
+
 /// AMDGPU target specific variadic MCExpr operations.
 ///
 /// Takes in a minimum of 1 argument to be used with an operation. The supported
@@ -26,7 +29,15 @@ namespace llvm {
 ///
 class AMDGPUVariadicMCExpr : public MCTargetExpr {
 public:
-  enum VariadicKind { AGVK_None, AGVK_Or, AGVK_Max };
+  enum VariadicKind {
+    AGVK_None,
+    AGVK_Or,
+    AGVK_Max,
+    AGVK_ExtraSGPRs,
+    AGVK_TotalNumVGPRs,
+    AGVK_AlignTo,
+    AGVK_Occupancy
+  };
 
 private:
   VariadicKind Kind;
@@ -38,6 +49,15 @@ class AMDGPUVariadicMCExpr : public MCTargetExpr {
                        MCContext &Ctx);
   ~AMDGPUVariadicMCExpr();
 
+  bool evaluateExtraSGPRs(MCValue &Res, const MCAsmLayout *Layout,
+                          const MCFixup *Fixup) const;
+  bool evaluateTotalNumVGPR(MCValue &Res, const MCAsmLayout *Layout,
+                            const MCFixup *Fixup) const;
+  bool evaluateAlignTo(MCValue &Res, const MCAsmLayout *Layout,
+                       const MCFixup *Fixup) const;
+  bool evaluateOccupancy(MCValue &Res, const MCAsmLayout *Layout,
+                         const MCFixup *Fixup) const;
+
 public:
   static const AMDGPUVariadicMCExpr *
   create(VariadicKind Kind, ArrayRef<const MCExpr *> Args, MCContext &Ctx);
@@ -52,6 +72,26 @@ class AMDGPUVariadicMCExpr : public MCTargetExpr {
     return create(VariadicKind::AGVK_Max, Args, Ctx);
   }
 
+  static const AMDGPUVariadicMCExpr *createExtraSGPRs(const MCExpr *VCCUsed,
+                                                      const MCExpr *FlatScrUsed,
+                                                      bool XNACKUsed,
+                                                      MCContext &Ctx);
+
+  static const AMDGPUVariadicMCExpr *createTotalNumVGPR(const MCExpr *NumAGPR,
+                                                        const MCExpr *NumVGPR,
+                                                        MCContext &Ctx);
+
+  static const AMDGPUVariadicMCExpr *
+  createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx) {
+    return create(VariadicKind::AGVK_AlignTo, {Value, Align}, Ctx);
+  }
+
+  static const AMDGPUVariadicMCExpr *createOccupancy(unsigned InitOcc,
+                                                     const MCExpr *NumSGPRs,
+                                                     const MCExpr *NumVGPRs,
+                                                     const GCNSubtarget &STM,
+                                                     MCContext &Ctx);
+
   VariadicKind getKind() const { return Kind; }
   const MCExpr *getSubExpr(size_t Index) const;
 

diff  --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
index 9ed7aacc0538e..0d40816cdd4b8 100644
--- a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
@@ -18,57 +18,114 @@
 #include "GCNSubtarget.h"
 #include "SIDefines.h"
 #include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/MC/MCExpr.h"
 
 using namespace llvm;
 
-uint64_t SIProgramInfo::getComputePGMRSrc1(const GCNSubtarget &ST) const {
-  uint64_t Reg = S_00B848_VGPRS(VGPRBlocks) | S_00B848_SGPRS(SGPRBlocks) |
-                 S_00B848_PRIORITY(Priority) | S_00B848_FLOAT_MODE(FloatMode) |
-                 S_00B848_PRIV(Priv) | S_00B848_DEBUG_MODE(DebugMode) |
-                 S_00B848_WGP_MODE(WgpMode) | S_00B848_MEM_ORDERED(MemOrdered);
+void SIProgramInfo::reset(const MachineFunction &MF) {
+  MCContext &Ctx = MF.getContext();
+
+  const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
+
+  VGPRBlocks = ZeroExpr;
+  SGPRBlocks = ZeroExpr;
+  Priority = 0;
+  FloatMode = 0;
+  Priv = 0;
+  DX10Clamp = 0;
+  DebugMode = 0;
+  IEEEMode = 0;
+  WgpMode = 0;
+  MemOrdered = 0;
+  RrWgMode = 0;
+  ScratchSize = ZeroExpr;
+
+  LDSBlocks = 0;
+  ScratchBlocks = ZeroExpr;
+
+  ScratchEnable = ZeroExpr;
+  UserSGPR = 0;
+  TrapHandlerEnable = 0;
+  TGIdXEnable = 0;
+  TGIdYEnable = 0;
+  TGIdZEnable = 0;
+  TGSizeEnable = 0;
+  TIdIGCompCount = 0;
+  EXCPEnMSB = 0;
+  LdsSize = 0;
+  EXCPEnable = 0;
+
+  ComputePGMRSrc3GFX90A = ZeroExpr;
+
+  NumVGPR = ZeroExpr;
+  NumArchVGPR = ZeroExpr;
+  NumAccVGPR = ZeroExpr;
+  AccumOffset = ZeroExpr;
+  TgSplit = 0;
+  NumSGPR = ZeroExpr;
+  SGPRSpill = 0;
+  VGPRSpill = 0;
+  LDSSize = 0;
+  FlatUsed = ZeroExpr;
+
+  NumSGPRsForWavesPerEU = ZeroExpr;
+  NumVGPRsForWavesPerEU = ZeroExpr;
+  Occupancy = ZeroExpr;
+  DynamicCallStack = ZeroExpr;
+  VCCUsed = ZeroExpr;
+}
+
+static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo,
+                                      const GCNSubtarget &ST) {
+  uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) |
+                 S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
+                 S_00B848_PRIV(ProgInfo.Priv) |
+                 S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
+                 S_00B848_WGP_MODE(ProgInfo.WgpMode) |
+                 S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
 
   if (ST.hasDX10ClampMode())
-    Reg |= S_00B848_DX10_CLAMP(DX10Clamp);
+    Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
 
   if (ST.hasIEEEMode())
-    Reg |= S_00B848_IEEE_MODE(IEEEMode);
+    Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
 
   if (ST.hasRrWGMode())
-    Reg |= S_00B848_RR_WG_MODE(RrWgMode);
+    Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
 
   return Reg;
 }
 
-uint64_t SIProgramInfo::getPGMRSrc1(CallingConv::ID CC,
-                                    const GCNSubtarget &ST) const {
-  if (AMDGPU::isCompute(CC)) {
-    return getComputePGMRSrc1(ST);
-  }
-  uint64_t Reg = S_00B848_VGPRS(VGPRBlocks) | S_00B848_SGPRS(SGPRBlocks) |
-                 S_00B848_PRIORITY(Priority) | S_00B848_FLOAT_MODE(FloatMode) |
-                 S_00B848_PRIV(Priv) | S_00B848_DEBUG_MODE(DebugMode);
+static uint64_t getPGMRSrc1Reg(const SIProgramInfo &ProgInfo,
+                               CallingConv::ID CC, const GCNSubtarget &ST) {
+  uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) |
+                 S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
+                 S_00B848_PRIV(ProgInfo.Priv) |
+                 S_00B848_DEBUG_MODE(ProgInfo.DebugMode);
 
   if (ST.hasDX10ClampMode())
-    Reg |= S_00B848_DX10_CLAMP(DX10Clamp);
+    Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
 
   if (ST.hasIEEEMode())
-    Reg |= S_00B848_IEEE_MODE(IEEEMode);
+    Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
 
   if (ST.hasRrWGMode())
-    Reg |= S_00B848_RR_WG_MODE(RrWgMode);
+    Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
 
   switch (CC) {
   case CallingConv::AMDGPU_PS:
-    Reg |= S_00B028_MEM_ORDERED(MemOrdered);
+    Reg |= S_00B028_MEM_ORDERED(ProgInfo.MemOrdered);
     break;
   case CallingConv::AMDGPU_VS:
-    Reg |= S_00B128_MEM_ORDERED(MemOrdered);
+    Reg |= S_00B128_MEM_ORDERED(ProgInfo.MemOrdered);
     break;
   case CallingConv::AMDGPU_GS:
-    Reg |= S_00B228_WGP_MODE(WgpMode) | S_00B228_MEM_ORDERED(MemOrdered);
+    Reg |= S_00B228_WGP_MODE(ProgInfo.WgpMode) |
+           S_00B228_MEM_ORDERED(ProgInfo.MemOrdered);
     break;
   case CallingConv::AMDGPU_HS:
-    Reg |= S_00B428_WGP_MODE(WgpMode) | S_00B428_MEM_ORDERED(MemOrdered);
+    Reg |= S_00B428_WGP_MODE(ProgInfo.WgpMode) |
+           S_00B428_MEM_ORDERED(ProgInfo.MemOrdered);
     break;
   default:
     break;
@@ -76,22 +133,108 @@ uint64_t SIProgramInfo::getPGMRSrc1(CallingConv::ID CC,
   return Reg;
 }
 
-uint64_t SIProgramInfo::getComputePGMRSrc2() const {
-  uint64_t Reg =
-      S_00B84C_SCRATCH_EN(ScratchEnable) | S_00B84C_USER_SGPR(UserSGPR) |
-      S_00B84C_TRAP_HANDLER(TrapHandlerEnable) |
-      S_00B84C_TGID_X_EN(TGIdXEnable) | S_00B84C_TGID_Y_EN(TGIdYEnable) |
-      S_00B84C_TGID_Z_EN(TGIdZEnable) | S_00B84C_TG_SIZE_EN(TGSizeEnable) |
-      S_00B84C_TIDIG_COMP_CNT(TIdIGCompCount) |
-      S_00B84C_EXCP_EN_MSB(EXCPEnMSB) | S_00B84C_LDS_SIZE(LdsSize) |
-      S_00B84C_EXCP_EN(EXCPEnable);
+static uint64_t getComputePGMRSrc2Reg(const SIProgramInfo &ProgInfo) {
+  uint64_t Reg = S_00B84C_USER_SGPR(ProgInfo.UserSGPR) |
+                 S_00B84C_TRAP_HANDLER(ProgInfo.TrapHandlerEnable) |
+                 S_00B84C_TGID_X_EN(ProgInfo.TGIdXEnable) |
+                 S_00B84C_TGID_Y_EN(ProgInfo.TGIdYEnable) |
+                 S_00B84C_TGID_Z_EN(ProgInfo.TGIdZEnable) |
+                 S_00B84C_TG_SIZE_EN(ProgInfo.TGSizeEnable) |
+                 S_00B84C_TIDIG_COMP_CNT(ProgInfo.TIdIGCompCount) |
+                 S_00B84C_EXCP_EN_MSB(ProgInfo.EXCPEnMSB) |
+                 S_00B84C_LDS_SIZE(ProgInfo.LdsSize) |
+                 S_00B84C_EXCP_EN(ProgInfo.EXCPEnable);
+
+  return Reg;
+}
+
+static const MCExpr *MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift,
+                               MCContext &Ctx) {
+  if (Mask) {
+    const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
+    Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
+  }
+  if (Shift) {
+    const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
+    Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx);
+  }
+  return Val;
+}
+
+uint64_t SIProgramInfo::getComputePGMRSrc1(const GCNSubtarget &ST) const {
+  int64_t VBlocks, SBlocks;
+  VGPRBlocks->evaluateAsAbsolute(VBlocks);
+  SGPRBlocks->evaluateAsAbsolute(SBlocks);
+
+  uint64_t Reg = S_00B848_VGPRS(static_cast<uint64_t>(VBlocks)) |
+                 S_00B848_SGPRS(static_cast<uint64_t>(SBlocks)) |
+                 getComputePGMRSrc1Reg(*this, ST);
 
   return Reg;
 }
 
+uint64_t SIProgramInfo::getPGMRSrc1(CallingConv::ID CC,
+                                    const GCNSubtarget &ST) const {
+  if (AMDGPU::isCompute(CC)) {
+    return getComputePGMRSrc1(ST);
+  }
+  int64_t VBlocks, SBlocks;
+  VGPRBlocks->evaluateAsAbsolute(VBlocks);
+  SGPRBlocks->evaluateAsAbsolute(SBlocks);
+
+  return getPGMRSrc1Reg(*this, CC, ST) |
+         S_00B848_VGPRS(static_cast<uint64_t>(VBlocks)) |
+         S_00B848_SGPRS(static_cast<uint64_t>(SBlocks));
+}
+
+uint64_t SIProgramInfo::getComputePGMRSrc2() const {
+  int64_t ScratchEn;
+  ScratchEnable->evaluateAsAbsolute(ScratchEn);
+  return ScratchEn | getComputePGMRSrc2Reg(*this);
+}
+
 uint64_t SIProgramInfo::getPGMRSrc2(CallingConv::ID CC) const {
   if (AMDGPU::isCompute(CC))
     return getComputePGMRSrc2();
 
   return 0;
 }
+
+const MCExpr *SIProgramInfo::getComputePGMRSrc1(const GCNSubtarget &ST,
+                                                MCContext &Ctx) const {
+  uint64_t Reg = getComputePGMRSrc1Reg(*this, ST);
+  const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
+  const MCExpr *Res = MCBinaryExpr::createOr(
+      MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
+      MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
+  return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
+}
+
+const MCExpr *SIProgramInfo::getPGMRSrc1(CallingConv::ID CC,
+                                         const GCNSubtarget &ST,
+                                         MCContext &Ctx) const {
+  if (AMDGPU::isCompute(CC)) {
+    return getComputePGMRSrc1(ST, Ctx);
+  }
+
+  uint64_t Reg = getPGMRSrc1Reg(*this, CC, ST);
+  const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
+  const MCExpr *Res = MCBinaryExpr::createOr(
+      MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
+      MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
+  return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
+}
+
+const MCExpr *SIProgramInfo::getComputePGMRSrc2(MCContext &Ctx) const {
+  uint64_t Reg = getComputePGMRSrc2Reg(*this);
+  const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
+  return MCBinaryExpr::createOr(ScratchEnable, RegExpr, Ctx);
+}
+
+const MCExpr *SIProgramInfo::getPGMRSrc2(CallingConv::ID CC,
+                                         MCContext &Ctx) const {
+  if (AMDGPU::isCompute(CC))
+    return getComputePGMRSrc2(Ctx);
+
+  return MCConstantExpr::create(0, Ctx);
+}

diff  --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.h b/llvm/lib/Target/AMDGPU/SIProgramInfo.h
index 8c26789f936cf..c0a353033c3c5 100644
--- a/llvm/lib/Target/AMDGPU/SIProgramInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.h
@@ -22,12 +22,15 @@
 namespace llvm {
 
 class GCNSubtarget;
+class MCContext;
+class MCExpr;
+class MachineFunction;
 
 /// Track resource usage for kernels / entry functions.
 struct SIProgramInfo {
     // Fields set in PGM_RSRC1 pm4 packet.
-    uint32_t VGPRBlocks = 0;
-    uint32_t SGPRBlocks = 0;
+    const MCExpr *VGPRBlocks = nullptr;
+    const MCExpr *SGPRBlocks = nullptr;
     uint32_t Priority = 0;
     uint32_t FloatMode = 0;
     uint32_t Priv = 0;
@@ -37,14 +40,14 @@ struct SIProgramInfo {
     uint32_t WgpMode = 0; // GFX10+
     uint32_t MemOrdered = 0; // GFX10+
     uint32_t RrWgMode = 0;   // GFX12+
-    uint64_t ScratchSize = 0;
+    const MCExpr *ScratchSize = nullptr;
 
     // State used to calculate fields set in PGM_RSRC2 pm4 packet.
     uint32_t LDSBlocks = 0;
-    uint32_t ScratchBlocks = 0;
+    const MCExpr *ScratchBlocks = nullptr;
 
     // Fields set in PGM_RSRC2 pm4 packet
-    uint32_t ScratchEnable = 0;
+    const MCExpr *ScratchEnable = nullptr;
     uint32_t UserSGPR = 0;
     uint32_t TrapHandlerEnable = 0;
     uint32_t TGIdXEnable = 0;
@@ -56,44 +59,56 @@ struct SIProgramInfo {
     uint32_t LdsSize = 0;
     uint32_t EXCPEnable = 0;
 
-    uint64_t ComputePGMRSrc3GFX90A = 0;
+    const MCExpr *ComputePGMRSrc3GFX90A = nullptr;
 
-    uint32_t NumVGPR = 0;
-    uint32_t NumArchVGPR = 0;
-    uint32_t NumAccVGPR = 0;
-    uint32_t AccumOffset = 0;
+    const MCExpr *NumVGPR = nullptr;
+    const MCExpr *NumArchVGPR = nullptr;
+    const MCExpr *NumAccVGPR = nullptr;
+    const MCExpr *AccumOffset = nullptr;
     uint32_t TgSplit = 0;
-    uint32_t NumSGPR = 0;
+    const MCExpr *NumSGPR = nullptr;
     unsigned SGPRSpill = 0;
     unsigned VGPRSpill = 0;
     uint32_t LDSSize = 0;
-    bool FlatUsed = false;
+    const MCExpr *FlatUsed = nullptr;
 
     // Number of SGPRs that meets number of waves per execution unit request.
-    uint32_t NumSGPRsForWavesPerEU = 0;
+    const MCExpr *NumSGPRsForWavesPerEU = nullptr;
 
     // Number of VGPRs that meets number of waves per execution unit request.
-    uint32_t NumVGPRsForWavesPerEU = 0;
+    const MCExpr *NumVGPRsForWavesPerEU = nullptr;
 
     // Final occupancy.
-    uint32_t Occupancy = 0;
+    const MCExpr *Occupancy = nullptr;
 
     // Whether there is recursion, dynamic allocas, indirect calls or some other
     // reason there may be statically unknown stack usage.
-    bool DynamicCallStack = false;
+    const MCExpr *DynamicCallStack = nullptr;
 
     // Bonus information for debugging.
-    bool VCCUsed = false;
+    const MCExpr *VCCUsed = nullptr;
 
     SIProgramInfo() = default;
 
+    // The constructor sets the values for each member as shown in the struct.
+    // However, setting the MCExpr members to their zero value equivalent
+    // happens in reset together with (duplicated) value re-set for the
+    // non-MCExpr members.
+    void reset(const MachineFunction &MF);
+
     /// Compute the value of the ComputePGMRsrc1 register.
     uint64_t getComputePGMRSrc1(const GCNSubtarget &ST) const;
     uint64_t getPGMRSrc1(CallingConv::ID CC, const GCNSubtarget &ST) const;
+    const MCExpr *getComputePGMRSrc1(const GCNSubtarget &ST,
+                                     MCContext &Ctx) const;
+    const MCExpr *getPGMRSrc1(CallingConv::ID CC, const GCNSubtarget &ST,
+                              MCContext &Ctx) const;
 
     /// Compute the value of the ComputePGMRsrc2 register.
     uint64_t getComputePGMRSrc2() const;
     uint64_t getPGMRSrc2(CallingConv::ID CC) const;
+    const MCExpr *getComputePGMRSrc2(MCContext &Ctx) const;
+    const MCExpr *getPGMRSrc2(CallingConv::ID CC, MCContext &Ctx) const;
 };
 
 } // namespace llvm

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 2fae7a31d70bf..2beaf903542bd 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1129,12 +1129,45 @@ unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
 
 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
                                       unsigned NumVGPRs) {
-  unsigned MaxWaves = getMaxWavesPerEU(STI);
-  unsigned Granule = getVGPRAllocGranule(STI);
+  return getNumWavesPerEUWithNumVGPRs(NumVGPRs, getVGPRAllocGranule(STI),
+                                      getMaxWavesPerEU(STI),
+                                      getTotalNumVGPRs(STI));
+}
+
+unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
+                                      unsigned MaxWaves,
+                                      unsigned TotalNumVGPRs) {
   if (NumVGPRs < Granule)
     return MaxWaves;
   unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
-  return std::min(std::max(getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves);
+  return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves);
+}
+
+unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
+                                  AMDGPUSubtarget::Generation Gen) {
+  if (Gen >= AMDGPUSubtarget::GFX10)
+    return MaxWaves;
+
+  if (Gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+    if (SGPRs <= 80)
+      return 10;
+    if (SGPRs <= 88)
+      return 9;
+    if (SGPRs <= 100)
+      return 8;
+    return 7;
+  }
+  if (SGPRs <= 48)
+    return 10;
+  if (SGPRs <= 56)
+    return 9;
+  if (SGPRs <= 64)
+    return 8;
+  if (SGPRs <= 72)
+    return 7;
+  if (SGPRs <= 80)
+    return 6;
+  return 5;
 }
 
 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 12d1b3a55cccb..fc4147df76e3e 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -9,6 +9,7 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
 
+#include "AMDGPUSubtarget.h"
 #include "SIDefines.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/InstrTypes.h"
@@ -311,6 +312,17 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
                                       unsigned NumVGPRs);
 
+/// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule
+/// size, \p MaxWaves possible, and \p TotalNumVGPRs available.
+unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
+                                      unsigned MaxWaves,
+                                      unsigned TotalNumVGPRs);
+
+/// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
+/// Gen.
+unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
+                                  AMDGPUSubtarget::Generation Gen);
+
 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
 /// \p NumVGPRs are used. We actually return the number of blocks -1, since
 /// that's what we encode.

diff  --git a/llvm/test/MC/AMDGPU/alignto_mcexpr.s b/llvm/test/MC/AMDGPU/alignto_mcexpr.s
new file mode 100644
index 0000000000000..e864f3736828c
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/alignto_mcexpr.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa < %s | FileCheck --check-prefix=ASM %s
+
+// ASM: .set alignto_zero_eight, 0
+// ASM: .set alignto_one_eight, 8
+// ASM: .set alignto_five_eight, 8
+// ASM: .set alignto_seven_eight, 8
+// ASM: .set alignto_eight_eight, 8
+// ASM: .set alignto_ten_eight, 16
+
+.set alignto_zero_eight, alignto(0, 8)
+.set alignto_one_eight, alignto(1, 8)
+.set alignto_five_eight, alignto(5, 8)
+.set alignto_seven_eight, alignto(7, 8)
+.set alignto_eight_eight, alignto(8, 8)
+.set alignto_ten_eight, alignto(10, 8)

diff  --git a/llvm/test/MC/AMDGPU/extrasgprs_mcexpr.s b/llvm/test/MC/AMDGPU/extrasgprs_mcexpr.s
new file mode 100644
index 0000000000000..e88b23bb34d4f
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/extrasgprs_mcexpr.s
@@ -0,0 +1,31 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=bonaire < %s | FileCheck --check-prefix=GFX7 %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=GFX90A %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefix=GFX940 %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s
+
+// gfx940 has architected flat scratch enabled.
+
+// GFX7: .set extrasgpr_none, 0
+// GFX7: .set extrasgpr_vcc, 2
+// GFX7: .set extrasgpr_flatscr, 4
+// GFX7: .set extrasgpr_xnack, 0
+
+// GFX90A: .set extrasgpr_none, 0
+// GFX90A: .set extrasgpr_vcc, 2
+// GFX90A: .set extrasgpr_flatscr, 6
+// GFX90A: .set extrasgpr_xnack, 4
+
+// GFX940: .set extrasgpr_none, 6
+// GFX940: .set extrasgpr_vcc, 6
+// GFX940: .set extrasgpr_flatscr, 6
+// GFX940: .set extrasgpr_xnack, 6
+
+// GFX10: .set extrasgpr_none, 0
+// GFX10: .set extrasgpr_vcc, 2
+// GFX10: .set extrasgpr_flatscr, 0
+// GFX10: .set extrasgpr_xnack, 0
+
+.set extrasgpr_none, extrasgprs(0, 0, 0)
+.set extrasgpr_vcc, extrasgprs(1, 0, 0)
+.set extrasgpr_flatscr, extrasgprs(0, 1, 0)
+.set extrasgpr_xnack, extrasgprs(0, 0, 1)

diff  --git a/llvm/test/MC/AMDGPU/occupancy_mcexpr.s b/llvm/test/MC/AMDGPU/occupancy_mcexpr.s
new file mode 100644
index 0000000000000..06bec8c538dae
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/occupancy_mcexpr.s
@@ -0,0 +1,61 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa < %s | FileCheck --check-prefix=ASM %s
+
+// ASM: .set occupancy_init_one, 1
+// ASM: .set occupancy_init_seven, 7
+// ASM: .set occupancy_init_eight, 8
+
+.set occupancy_init_one, occupancy(0, 0, 0, 0, 1, 0, 0)
+.set occupancy_init_seven, occupancy(0, 0, 0, 0, 7, 0, 0)
+.set occupancy_init_eight, occupancy(0, 0, 0, 0, 8, 0, 0)
+
+// ASM: .set occupancy_numsgpr_seaisle_ten, 10
+// ASM: .set occupancy_numsgpr_seaisle_nine, 9
+// ASM: .set occupancy_numsgpr_seaisle_eight, 8
+// ASM: .set occupancy_numsgpr_seaisle_seven, 7
+// ASM: .set occupancy_numsgpr_seaisle_six, 6
+// ASM: .set occupancy_numsgpr_seaisle_five, 5
+
+.set occupancy_numsgpr_seaisle_ten, occupancy(0, 0, 0, 6, 11, 1, 0)
+.set occupancy_numsgpr_seaisle_nine, occupancy(0, 0, 0, 6, 11, 49, 0)
+.set occupancy_numsgpr_seaisle_eight, occupancy(0, 0, 0, 6, 11, 57, 0)
+.set occupancy_numsgpr_seaisle_seven, occupancy(0, 0, 0, 6, 11, 65, 0)
+.set occupancy_numsgpr_seaisle_six, occupancy(0, 0, 0, 6, 11, 73, 0)
+.set occupancy_numsgpr_seaisle_five, occupancy(0, 0, 0, 6, 11, 81, 0)
+
+// ASM: .set occupancy_numsgpr_gfx9_ten, 10
+// ASM: .set occupancy_numsgpr_gfx9_nine, 9
+// ASM: .set occupancy_numsgpr_gfx9_eight, 8
+// ASM: .set occupancy_numsgpr_gfx9_seven, 7
+
+.set occupancy_numsgpr_gfx9_ten, occupancy(0, 0, 0, 8, 11, 1, 0)
+.set occupancy_numsgpr_gfx9_nine, occupancy(0, 0, 0, 8, 11, 81, 0)
+.set occupancy_numsgpr_gfx9_eight, occupancy(0, 0, 0, 8, 11, 89, 0)
+.set occupancy_numsgpr_gfx9_seven, occupancy(0, 0, 0, 8, 11, 101, 0)
+
+// ASM: .set occupancy_numsgpr_gfx10_one, 1
+// ASM: .set occupancy_numsgpr_gfx10_seven, 7
+// ASM: .set occupancy_numsgpr_gfx10_eight, 8
+
+.set occupancy_numsgpr_gfx10_one, occupancy(1, 0, 0, 9, 11, 1, 0)
+.set occupancy_numsgpr_gfx10_seven, occupancy(7, 0, 0, 9, 11, 1, 0)
+.set occupancy_numsgpr_gfx10_eight, occupancy(8, 0, 0, 9, 11, 1, 0)
+
+// ASM: .set occupancy_numvgpr_high_granule_one, 1
+// ASM: .set occupancy_numvgpr_high_granule_seven, 7
+// ASM: .set occupancy_numvgpr_high_granule_eight, 8
+
+.set occupancy_numvgpr_high_granule_one, occupancy(1, 2, 0, 0, 11, 0, 1)
+.set occupancy_numvgpr_high_granule_seven, occupancy(7, 2, 0, 0, 11, 0, 1)
+.set occupancy_numvgpr_high_granule_eight, occupancy(8, 2, 0, 0, 11, 0, 1)
+
+// ASM: .set occupancy_numvgpr_low_total_one, 1
+// ASM: .set occupancy_numvgpr_one, 1
+// ASM: .set occupancy_numvgpr_seven, 7
+// ASM: .set occupancy_numvgpr_eight, 8
+// ASM: .set occupancy_numvgpr_ten, 10
+
+.set occupancy_numvgpr_low_total_one, occupancy(11, 4, 2, 0, 11, 0, 4)
+.set occupancy_numvgpr_one, occupancy(11, 4, 4, 0, 11, 0, 4)
+.set occupancy_numvgpr_seven, occupancy(11, 4, 28, 0, 11, 0, 4)
+.set occupancy_numvgpr_eight, occupancy(11, 4, 32, 0, 11, 0, 4)
+.set occupancy_numvgpr_ten, occupancy(11, 4, 40, 0, 11, 0, 4)

diff  --git a/llvm/test/MC/AMDGPU/totalnumvgpr_mcexpr.s b/llvm/test/MC/AMDGPU/totalnumvgpr_mcexpr.s
new file mode 100644
index 0000000000000..29bb885b20804
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/totalnumvgpr_mcexpr.s
@@ -0,0 +1,26 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=GFX90A %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s
+
+// GFX10: .set totalvgpr_none, 0
+// GFX10: .set totalvgpr_one, 1
+// GFX10: .set totalvgpr_two, 2
+
+.set totalvgpr_none, totalnumvgprs(0, 0)
+.set totalvgpr_one, totalnumvgprs(1, 0)
+.set totalvgpr_two, totalnumvgprs(1, 2)
+
+// GFX90A: .set totalvgpr90a_none, 0
+// GFX90A: .set totalvgpr90a_one, 1
+// GFX90A: .set totalvgpr90a_two, 2
+
+.set totalvgpr90a_none, totalnumvgprs(0, 0)
+.set totalvgpr90a_one, totalnumvgprs(0, 1)
+.set totalvgpr90a_two, totalnumvgprs(0, 2)
+
+// GFX90A: .set totalvgpr90a_agpr_minimal, 1
+// GFX90A: .set totalvgpr90a_agpr_rounded_eight, 8
+// GFX90A: .set totalvgpr90a_agpr_exact_eight, 8
+
+.set totalvgpr90a_agpr_minimal, totalnumvgprs(1, 0)
+.set totalvgpr90a_agpr_rounded_eight, totalnumvgprs(4, 2)
+.set totalvgpr90a_agpr_exact_eight, totalnumvgprs(4, 4)

diff  --git a/llvm/unittests/MC/AMDGPU/CMakeLists.txt b/llvm/unittests/MC/AMDGPU/CMakeLists.txt
index 06ca89a72a7cd..be8ff572e6f7d 100644
--- a/llvm/unittests/MC/AMDGPU/CMakeLists.txt
+++ b/llvm/unittests/MC/AMDGPU/CMakeLists.txt
@@ -1,12 +1,20 @@
+include_directories(
+  ${PROJECT_SOURCE_DIR}/lib/Target/AMDGPU
+  ${PROJECT_BINARY_DIR}/lib/Target/AMDGPU
+  )
+
 set(LLVM_LINK_COMPONENTS
   AMDGPUCodeGen
   AMDGPUDesc
   AMDGPUInfo
+  CodeGen
+  Core
   MC
   Support
   TargetParser
   )
 
-add_llvm_unittest(AMDGPUDwarfTests
+add_llvm_unittest(AMDGPUMCTests
   DwarfRegMappings.cpp
+  SIProgramInfoMCExprs.cpp
   )

diff  --git a/llvm/unittests/MC/AMDGPU/SIProgramInfoMCExprs.cpp b/llvm/unittests/MC/AMDGPU/SIProgramInfoMCExprs.cpp
new file mode 100644
index 0000000000000..f2161f71e6e99
--- /dev/null
+++ b/llvm/unittests/MC/AMDGPU/SIProgramInfoMCExprs.cpp
@@ -0,0 +1,81 @@
+//===- llvm/unittests/MC/AMDGPU/SIProgramInfoMCExprs.cpp ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUHSAMetadataStreamer.h"
+#include "AMDGPUTargetMachine.h"
+#include "GCNSubtarget.h"
+#include "SIProgramInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+class SIProgramInfoMCExprsTest : public testing::Test {
+protected:
+  std::unique_ptr<GCNTargetMachine> TM;
+  std::unique_ptr<LLVMContext> Ctx;
+  std::unique_ptr<GCNSubtarget> ST;
+  std::unique_ptr<MachineModuleInfo> MMI;
+  std::unique_ptr<MachineFunction> MF;
+  std::unique_ptr<Module> M;
+
+  SIProgramInfo PI;
+
+  static void SetUpTestSuite() {
+    LLVMInitializeAMDGPUTargetInfo();
+    LLVMInitializeAMDGPUTarget();
+    LLVMInitializeAMDGPUTargetMC();
+  }
+
+  SIProgramInfoMCExprsTest() {
+    std::string Triple = "amdgcn-amd-amdhsa";
+    std::string CPU = "gfx1010";
+    std::string FS = "";
+
+    std::string Error;
+    const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
+    TargetOptions Options;
+
+    TM.reset(static_cast<GCNTargetMachine *>(TheTarget->createTargetMachine(
+        Triple, CPU, FS, Options, std::nullopt, std::nullopt)));
+
+    Ctx = std::make_unique<LLVMContext>();
+    M = std::make_unique<Module>("Module", *Ctx);
+    M->setDataLayout(TM->createDataLayout());
+    auto *FType = FunctionType::get(Type::getVoidTy(*Ctx), false);
+    auto *F = Function::Create(FType, GlobalValue::ExternalLinkage, "Test", *M);
+    MMI = std::make_unique<MachineModuleInfo>(TM.get());
+
+    ST = std::make_unique<GCNSubtarget>(TM->getTargetTriple(),
+                                        TM->getTargetCPU(),
+                                        TM->getTargetFeatureString(), *TM);
+
+    MF = std::make_unique<MachineFunction>(*F, *TM, *ST, 1, *MMI);
+    PI.reset(*MF.get());
+  }
+};
+
+TEST_F(SIProgramInfoMCExprsTest, TestDeathHSAKernelEmit) {
+  MCContext &Ctx = MF->getContext();
+  MCSymbol *Sym = Ctx.getOrCreateSymbol("Unknown");
+  PI.ScratchSize = MCSymbolRefExpr::create(Sym, Ctx);
+
+  auto &Func = MF->getFunction();
+  Func.setCallingConv(CallingConv::AMDGPU_KERNEL);
+  AMDGPU::HSAMD::MetadataStreamerMsgPackV4 MD;
+  EXPECT_DEATH(MD.emitKernel(*MF, PI),
+               "could not resolve expression when required.");
+}