[llvm] [AMDGPU] MCExpr-ify MC layer kernel descriptor (PR #80855)

Janek van Oirschot via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 6 07:46:14 PST 2024


https://github.com/JanekvO created https://github.com/llvm/llvm-project/pull/80855

Kernel descriptor attributes, with their respective emit and asm parse functionality, converted to MCExpr. Required for moving function/program resource usage information propagation to MC layer. As a result of this change, some amdhsa directives in assembly can use asm symbols that are defined later than their use.

Furthermore, this change (unintentionally) fixes a latent bug relating to some uses of the AMDHSA_BITS_SET macro for which the VAL argument is not encapsulated and can result in unintended C++ statements (e.g., see how the macro expands for the tg-split and wavefrontsize32 attributes in getDefaultAmdhsaKernelDescriptor).

>From b3e77d2dcbd35ff442cb24dc907ed88dc42bd023 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Tue, 6 Feb 2024 15:29:26 +0000
Subject: [PATCH] MCExpr-ify MC layer kernel descriptor

---
 .../llvm/Support/AMDHSAKernelDescriptor.h     |  79 ++--
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp   |  30 +-
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      | 188 +++++----
 .../MCTargetDesc/AMDGPUTargetStreamer.cpp     | 381 +++++++++++-------
 .../MCTargetDesc/AMDHSAKernelDescriptor.cpp   |  32 ++
 .../Target/AMDGPU/MCTargetDesc/CMakeLists.txt |   1 +
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    |  84 ++--
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |   5 +-
 llvm/test/MC/AMDGPU/hsa-gfx12-v4.s            |   6 +-
 llvm/test/MC/AMDGPU/hsa-sym-exprs.s           |  68 ++++
 llvm/test/MC/AMDGPU/hsa-tg-split.s            |  74 ++++
 11 files changed, 641 insertions(+), 307 deletions(-)
 create mode 100644 llvm/lib/Target/AMDGPU/MCTargetDesc/AMDHSAKernelDescriptor.cpp
 create mode 100644 llvm/test/MC/AMDGPU/hsa-sym-exprs.s
 create mode 100644 llvm/test/MC/AMDGPU/hsa-tg-split.s

diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
index 84cac3ef700e05..9c5d8fa1c1a607 100644
--- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -52,6 +52,10 @@
 #endif // AMDHSA_BITS_SET
 
 namespace llvm {
+
+class MCContext;
+class MCExpr;
+
 namespace amdhsa {
 
 // Floating point rounding modes. Must match hardware definition.
@@ -238,18 +242,40 @@ enum : int32_t {
 
 // Kernel descriptor. Must be kept backwards compatible.
 struct kernel_descriptor_t {
-  uint32_t group_segment_fixed_size;
-  uint32_t private_segment_fixed_size;
-  uint32_t kernarg_size;
+  const MCExpr *group_segment_fixed_size;
+  const MCExpr *private_segment_fixed_size;
+  const MCExpr *kernarg_size;
   uint8_t reserved0[4];
   int64_t kernel_code_entry_byte_offset;
   uint8_t reserved1[20];
-  uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+
-  uint32_t compute_pgm_rsrc1;
-  uint32_t compute_pgm_rsrc2;
-  uint16_t kernel_code_properties;
-  uint16_t kernarg_preload;
+  const MCExpr *compute_pgm_rsrc3; // GFX10+ and GFX90A+
+  const MCExpr *compute_pgm_rsrc1;
+  const MCExpr *compute_pgm_rsrc2;
+  const MCExpr *kernel_code_properties;
+  const MCExpr *kernarg_preload;
   uint8_t reserved3[4];
+
+  static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift,
+                       uint32_t Mask, MCContext &Ctx);
+  static const MCExpr *bits_get(const MCExpr *Src, uint32_t Shift,
+                                uint32_t Mask, MCContext &Ctx);
+};
+
+// Sizes for kernel_descriptor_t properties, should add up to 64.
+enum : uint32_t {
+  SIZEOF_GROUP_SEGMENT_FIXED_SIZE = 4,
+  SIZEOF_PRIVATE_SEGMENT_FIXED_SIZE = 4,
+  SIZEOF_KERNARG_SIZE = 4,
+  SIZEOF_RESERVED0 = 4,
+  SIZEOF_KERNEL_CODE_ENTRY_BYTE_OFFSET = 8,
+  SIZEOF_RESERVED1 = 20,
+  SIZEOF_COMPUTE_PGM_RSRC3 = 4,
+  SIZEOF_COMPUTE_PGM_RSRC1 = 4,
+  SIZEOF_COMPUTE_PGM_RSRC2 = 4,
+  SIZEOF_KERNEL_CODE_PROPERTIES = 2,
+  SIZEOF_KERNARG_PRELOAD = 2,
+  SIZEOF_RESERVED3 = 4,
+  SIZEOF_KERNEL_DESCRIPTOR = 64
 };
 
 enum : uint32_t {
@@ -267,43 +293,6 @@ enum : uint32_t {
   RESERVED3_OFFSET = 60
 };
 
-static_assert(
-    sizeof(kernel_descriptor_t) == 64,
-    "invalid size for kernel_descriptor_t");
-static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) ==
-                  GROUP_SEGMENT_FIXED_SIZE_OFFSET,
-              "invalid offset for group_segment_fixed_size");
-static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
-                  PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
-              "invalid offset for private_segment_fixed_size");
-static_assert(offsetof(kernel_descriptor_t, kernarg_size) ==
-                  KERNARG_SIZE_OFFSET,
-              "invalid offset for kernarg_size");
-static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
-              "invalid offset for reserved0");
-static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==
-                  KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET,
-              "invalid offset for kernel_code_entry_byte_offset");
-static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET,
-              "invalid offset for reserved1");
-static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) ==
-                  COMPUTE_PGM_RSRC3_OFFSET,
-              "invalid offset for compute_pgm_rsrc3");
-static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) ==
-                  COMPUTE_PGM_RSRC1_OFFSET,
-              "invalid offset for compute_pgm_rsrc1");
-static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) ==
-                  COMPUTE_PGM_RSRC2_OFFSET,
-              "invalid offset for compute_pgm_rsrc2");
-static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) ==
-                  KERNEL_CODE_PROPERTIES_OFFSET,
-              "invalid offset for kernel_code_properties");
-static_assert(offsetof(kernel_descriptor_t, kernarg_preload) ==
-                  KERNARG_PRELOAD_OFFSET,
-              "invalid offset for kernarg_preload");
-static_assert(offsetof(kernel_descriptor_t, reserved3) == RESERVED3_OFFSET,
-              "invalid offset for reserved3");
-
 } // end namespace amdhsa
 } // end namespace llvm
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index db81e1ee9e3899..d68c7e499f62c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -434,24 +434,30 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
   assert(isUInt<32>(PI.getComputePGMRSrc1(STM)));
   assert(isUInt<32>(PI.getComputePGMRSrc2()));
 
-  KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
-  KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
+  KernelDescriptor.group_segment_fixed_size =
+      MCConstantExpr::create(PI.LDSSize, MF.getContext());
+  KernelDescriptor.private_segment_fixed_size =
+      MCConstantExpr::create(PI.ScratchSize, MF.getContext());
 
   Align MaxKernArgAlign;
-  KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
+  KernelDescriptor.kernarg_size = MCConstantExpr::create(
+      STM.getKernArgSegmentSize(F, MaxKernArgAlign), MF.getContext());
 
-  KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM);
-  KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2();
-  KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
+  KernelDescriptor.compute_pgm_rsrc1 =
+      MCConstantExpr::create(PI.getComputePGMRSrc1(STM), MF.getContext());
+  KernelDescriptor.compute_pgm_rsrc2 =
+      MCConstantExpr::create(PI.getComputePGMRSrc2(), MF.getContext());
+  KernelDescriptor.kernel_code_properties = MCConstantExpr::create(
+      getAmdhsaKernelCodeProperties(MF), MF.getContext());
 
   assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
-  if (STM.hasGFX90AInsts())
-    KernelDescriptor.compute_pgm_rsrc3 =
-      CurrentProgramInfo.ComputePGMRSrc3GFX90A;
+  KernelDescriptor.compute_pgm_rsrc3 = MCConstantExpr::create(
+      STM.hasGFX90AInsts() ? CurrentProgramInfo.ComputePGMRSrc3GFX90A : 0,
+      MF.getContext());
 
-  if (AMDGPU::hasKernargPreload(STM))
-    KernelDescriptor.kernarg_preload =
-        static_cast<uint16_t>(Info->getNumKernargPreloadedSGPRs());
+  KernelDescriptor.kernarg_preload = MCConstantExpr::create(
+      AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0,
+      MF.getContext());
 
   return KernelDescriptor;
 }
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 225e781588668f..2331af628fb730 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5236,7 +5236,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
   if (getParser().parseIdentifier(KernelName))
     return true;
 
-  kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
+  kernel_descriptor_t KD =
+      getDefaultAmdhsaKernelDescriptor(&getSTI(), getContext());
 
   StringSet<> Seen;
 
@@ -5276,89 +5277,107 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
       return TokError(".amdhsa_ directives cannot be repeated");
 
     SMLoc ValStart = getLoc();
-    int64_t IVal;
-    if (getParser().parseAbsoluteExpression(IVal))
+    const MCExpr *ExprVal;
+    if (getParser().parseExpression(ExprVal))
       return true;
     SMLoc ValEnd = getLoc();
     SMRange ValRange = SMRange(ValStart, ValEnd);
 
-    if (IVal < 0)
-      return OutOfRangeError(ValRange);
-
+    int64_t IVal = 0;
     uint64_t Val = IVal;
+    bool EvaluatableExpr;
+    if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
+      if (IVal < 0)
+        return OutOfRangeError(ValRange);
+      Val = IVal;
+    }
 
 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
-  if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
+  if (!isUInt<ENTRY##_WIDTH>(Val))                                             \
     return OutOfRangeError(RANGE);                                             \
-  AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
+  kernel_descriptor_t::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY,            \
+                                getContext());
+
+#define EXPR_SHOULD_RESOLVE()                                                  \
+  if (!EvaluatableExpr)                                                        \
+    return Error(IDRange.Start, "directive should have resolvable expression", \
+                 IDRange);
 
     if (ID == ".amdhsa_group_segment_fixed_size") {
-      if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
+      if (!isUInt<SIZEOF_GROUP_SEGMENT_FIXED_SIZE * CHAR_BIT>(Val))
         return OutOfRangeError(ValRange);
-      KD.group_segment_fixed_size = Val;
+      KD.group_segment_fixed_size = ExprVal;
     } else if (ID == ".amdhsa_private_segment_fixed_size") {
-      if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
+      if (!isUInt<SIZEOF_PRIVATE_SEGMENT_FIXED_SIZE * CHAR_BIT>(Val))
         return OutOfRangeError(ValRange);
-      KD.private_segment_fixed_size = Val;
+      KD.private_segment_fixed_size = ExprVal;
     } else if (ID == ".amdhsa_kernarg_size") {
-      if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
+      if (!isUInt<SIZEOF_KERNARG_SIZE * CHAR_BIT>(Val))
         return OutOfRangeError(ValRange);
-      KD.kernarg_size = Val;
+      KD.kernarg_size = ExprVal;
     } else if (ID == ".amdhsa_user_sgpr_count") {
+      EXPR_SHOULD_RESOLVE();
       ExplicitUserSGPRCount = Val;
     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
+      EXPR_SHOULD_RESOLVE();
       if (hasArchitectedFlatScratch())
         return Error(IDRange.Start,
                      "directive is not supported with architected flat scratch",
                      IDRange);
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
-                       Val, ValRange);
+                       ExprVal, ValRange);
       if (Val)
         ImpliedUserSGPRCount += 4;
     } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
+      EXPR_SHOULD_RESOLVE();
       if (!hasKernargPreload())
         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
 
       if (Val > getMaxNumUserSGPRs())
         return OutOfRangeError(ValRange);
-      PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val,
+      PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
                        ValRange);
       if (Val) {
         ImpliedUserSGPRCount += Val;
         PreloadLength = Val;
       }
     } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
+      EXPR_SHOULD_RESOLVE();
       if (!hasKernargPreload())
         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
 
       if (Val >= 1024)
         return OutOfRangeError(ValRange);
-      PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val,
+      PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
                        ValRange);
       if (Val)
         PreloadOffset = Val;
     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
+      EXPR_SHOULD_RESOLVE();
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
-                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
+                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
                        ValRange);
       if (Val)
         ImpliedUserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
+      EXPR_SHOULD_RESOLVE();
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
-                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
+                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
                        ValRange);
       if (Val)
         ImpliedUserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
+      EXPR_SHOULD_RESOLVE();
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
-                       Val, ValRange);
+                       ExprVal, ValRange);
       if (Val)
         ImpliedUserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
+      EXPR_SHOULD_RESOLVE();
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
-                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
+                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
                        ValRange);
       if (Val)
         ImpliedUserSGPRCount += 2;
@@ -5367,34 +5386,39 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
         return Error(IDRange.Start,
                      "directive is not supported with architected flat scratch",
                      IDRange);
+      EXPR_SHOULD_RESOLVE();
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
-                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
-                       ValRange);
+                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
+                       ExprVal, ValRange);
       if (Val)
         ImpliedUserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
+      EXPR_SHOULD_RESOLVE();
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
-                       Val, ValRange);
+                       ExprVal, ValRange);
       if (Val)
         ImpliedUserSGPRCount += 1;
     } else if (ID == ".amdhsa_wavefront_size32") {
+      EXPR_SHOULD_RESOLVE();
       if (IVersion.Major < 10)
         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
       EnableWavefrontSize32 = Val;
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
-                       KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
-                       Val, ValRange);
+                       KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
+                       ValRange);
     } else if (ID == ".amdhsa_uses_dynamic_stack") {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
-                       KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
+                       KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
+                       ValRange);
     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
       if (hasArchitectedFlatScratch())
         return Error(IDRange.Start,
                      "directive is not supported with architected flat scratch",
                      IDRange);
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
-                       COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
+                       COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
+                       ValRange);
     } else if (ID == ".amdhsa_enable_private_segment") {
       if (!hasArchitectedFlatScratch())
         return Error(
@@ -5402,42 +5426,48 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
             "directive is not supported without architected flat scratch",
             IDRange);
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
-                       COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
+                       COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
+                       ValRange);
     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
-                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
+                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
-                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
+                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
-                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
+                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
-                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
+                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
-                       COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
+                       COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_next_free_vgpr") {
+      EXPR_SHOULD_RESOLVE();
       VGPRRange = ValRange;
       NextFreeVGPR = Val;
     } else if (ID == ".amdhsa_next_free_sgpr") {
+      EXPR_SHOULD_RESOLVE();
       SGPRRange = ValRange;
       NextFreeSGPR = Val;
     } else if (ID == ".amdhsa_accum_offset") {
       if (!isGFX90A())
         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
+      EXPR_SHOULD_RESOLVE();
       AccumOffset = Val;
     } else if (ID == ".amdhsa_reserve_vcc") {
+      EXPR_SHOULD_RESOLVE();
       if (!isUInt<1>(Val))
         return OutOfRangeError(ValRange);
       ReserveVCC = Val;
     } else if (ID == ".amdhsa_reserve_flat_scratch") {
+      EXPR_SHOULD_RESOLVE();
       if (IVersion.Major < 7)
         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
       if (hasArchitectedFlatScratch())
@@ -5457,97 +5487,105 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
                                  IDRange);
     } else if (ID == ".amdhsa_float_round_mode_32") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
-                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
+                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
+                       ValRange);
     } else if (ID == ".amdhsa_float_round_mode_16_64") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
-                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
+                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
+                       ValRange);
     } else if (ID == ".amdhsa_float_denorm_mode_32") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
-                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
+                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
+                       ValRange);
     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
-                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
+                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_dx10_clamp") {
       if (IVersion.Major >= 12)
         return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
-                       COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val,
+                       COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_ieee_mode") {
       if (IVersion.Major >= 12)
         return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
-                       COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val,
+                       COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_fp16_overflow") {
       if (IVersion.Major < 9)
         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
-      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+                       COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_tg_split") {
       if (!isGFX90A())
         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
-      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
-                       ValRange);
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
+                       ExprVal, ValRange);
     } else if (ID == ".amdhsa_workgroup_processor_mode") {
       if (IVersion.Major < 10)
         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
-      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+                       COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_memory_ordered") {
       if (IVersion.Major < 10)
         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
-      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+                       COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_forward_progress") {
       if (IVersion.Major < 10)
         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
-      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+                       COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_shared_vgpr_count") {
+      EXPR_SHOULD_RESOLVE();
       if (IVersion.Major < 10 || IVersion.Major >= 12)
         return Error(IDRange.Start, "directive requires gfx10 or gfx11",
                      IDRange);
       SharedVGPRCount = Val;
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
-                       COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, Val,
+                       COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
                        ValRange);
     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
       PARSE_BITS_ENTRY(
           KD.compute_pgm_rsrc2,
-          COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
-          ValRange);
+          COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
+          ExprVal, ValRange);
     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
-                       Val, ValRange);
+                       ExprVal, ValRange);
     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
       PARSE_BITS_ENTRY(
           KD.compute_pgm_rsrc2,
-          COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
-          ValRange);
+          COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
+          ExprVal, ValRange);
     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
-                       Val, ValRange);
+                       ExprVal, ValRange);
     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
-                       Val, ValRange);
+                       ExprVal, ValRange);
     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
-                       Val, ValRange);
+                       ExprVal, ValRange);
     } else if (ID == ".amdhsa_exception_int_div_zero") {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
-                       Val, ValRange);
+                       ExprVal, ValRange);
     } else if (ID == ".amdhsa_round_robin_scheduling") {
       if (IVersion.Major < 12)
         return Error(IDRange.Start, "directive requires gfx12+", IDRange);
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
-                       COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val,
+                       COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
                        ValRange);
     } else {
       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
@@ -5574,15 +5612,18 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
           VGPRBlocks))
     return OutOfRangeError(VGPRRange);
-  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
-                  COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
+  kernel_descriptor_t::bits_set(
+      KD.compute_pgm_rsrc1, MCConstantExpr::create(VGPRBlocks, getContext()),
+      COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
+      COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
 
   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
           SGPRBlocks))
     return OutOfRangeError(SGPRRange);
-  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
-                  COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
-                  SGPRBlocks);
+  kernel_descriptor_t::bits_set(
+      KD.compute_pgm_rsrc1, MCConstantExpr::create(SGPRBlocks, getContext()),
+      COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
+      COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
 
   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
@@ -5593,11 +5634,17 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
 
   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
     return TokError("too many user SGPRs enabled");
-  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
-                  UserSGPRCount);
-
-  if (PreloadLength && KD.kernarg_size &&
-      (PreloadLength * 4 + PreloadOffset * 4 > KD.kernarg_size))
+  kernel_descriptor_t::bits_set(
+      KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
+      COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
+      COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
+
+  int64_t IVal = 0;
+  if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
+    return TokError("Kernarg size should be resolvable");
+  uint64_t kernarg_size = IVal;
+  if (PreloadLength && kernarg_size &&
+      (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
     return TokError("Kernarg preload length + offset is larger than the "
                     "kernarg segment size");
 
@@ -5609,8 +5656,11 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
                       "increments of 4");
     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
       return TokError("accum_offset exceeds total VGPR allocation");
-    AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
-                    (AccumOffset / 4 - 1));
+    kernel_descriptor_t::bits_set(
+        KD.compute_pgm_rsrc3,
+        MCConstantExpr::create(AccumOffset / 4 - 1, getContext()),
+        COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
+        COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
   }
 
   if (IVersion.Major >= 10 && IVersion.Major < 12) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 5e9b1674d87dcb..b2d0657e49f072 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -302,91 +302,142 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
     const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
     bool ReserveVCC, bool ReserveFlatScr) {
   IsaVersion IVersion = getIsaVersion(STI.getCPU());
+  const MCAsmInfo *MAI = getContext().getAsmInfo();
 
   OS << "\t.amdhsa_kernel " << KernelName << '\n';
 
-#define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME)   \
-  STREAM << "\t\t" << DIRECTIVE << " "                                         \
-         << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
-
-  OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
-     << '\n';
-  OS << "\t\t.amdhsa_private_segment_fixed_size "
-     << KD.private_segment_fixed_size << '\n';
-  OS << "\t\t.amdhsa_kernarg_size " << KD.kernarg_size << '\n';
-
-  PRINT_FIELD(OS, ".amdhsa_user_sgpr_count", KD,
-              compute_pgm_rsrc2,
-              amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT);
+  auto print_field = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask,
+                         StringRef Directive) {
+    int64_t IVal;
+    OS << "\t\t" << Directive << " ";
+    const MCExpr *pgm_rsrc1_bits =
+        amdhsa::kernel_descriptor_t::bits_get(Expr, Shift, Mask, getContext());
+    if (pgm_rsrc1_bits->evaluateAsAbsolute(IVal)) {
+      OS << static_cast<uint64_t>(IVal);
+    } else {
+      pgm_rsrc1_bits->print(OS, MAI);
+    }
+    OS << '\n';
+  };
+
+  OS << "\t\t.amdhsa_group_segment_fixed_size ";
+  KD.group_segment_fixed_size->print(OS, MAI);
+  OS << '\n';
+
+  OS << "\t\t.amdhsa_private_segment_fixed_size ";
+  KD.private_segment_fixed_size->print(OS, MAI);
+  OS << '\n';
+
+  OS << "\t\t.amdhsa_kernarg_size ";
+  KD.kernarg_size->print(OS, MAI);
+  OS << '\n';
+
+  print_field(
+      KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
+      amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, ".amdhsa_user_sgpr_count");
 
   if (!hasArchitectedFlatScratch(STI))
-    PRINT_FIELD(
-        OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
-        kernel_code_properties,
-        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
-  PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD,
-              kernel_code_properties,
-              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
-  PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD,
-              kernel_code_properties,
-              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
-  PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD,
-              kernel_code_properties,
-              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
-  PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD,
-              kernel_code_properties,
-              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
+    print_field(
+        KD.kernel_code_properties,
+        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
+        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
+        ".amdhsa_user_sgpr_private_segment_buffer");
+  print_field(KD.kernel_code_properties,
+              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
+              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
+              ".amdhsa_user_sgpr_dispatch_ptr");
+  print_field(KD.kernel_code_properties,
+              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
+              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
+              ".amdhsa_user_sgpr_queue_ptr");
+  print_field(
+      KD.kernel_code_properties,
+      amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
+      amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
+      ".amdhsa_user_sgpr_kernarg_segment_ptr");
+  print_field(KD.kernel_code_properties,
+              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
+              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
+              ".amdhsa_user_sgpr_dispatch_id");
   if (!hasArchitectedFlatScratch(STI))
-    PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD,
-                kernel_code_properties,
-                amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
+    print_field(
+        KD.kernel_code_properties,
+        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
+        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
+        ".amdhsa_user_sgpr_flat_scratch_init");
   if (hasKernargPreload(STI)) {
-    PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_length ", KD,
-                kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH);
-    PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_offset ", KD,
-                kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET);
+    print_field(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT,
+                amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
+                ".amdhsa_user_sgpr_kernarg_preload_length");
+    print_field(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT,
+                amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
+                ".amdhsa_user_sgpr_kernarg_preload_offset");
   }
-  PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
-              kernel_code_properties,
-              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+  print_field(
+      KD.kernel_code_properties,
+      amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
+      amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
+      ".amdhsa_user_sgpr_private_segment_size");
   if (IVersion.Major >= 10)
-    PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD,
-                kernel_code_properties,
-                amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
+    print_field(KD.kernel_code_properties,
+                amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
+                amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
+                ".amdhsa_wavefront_size32");
   if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
-    PRINT_FIELD(OS, ".amdhsa_uses_dynamic_stack", KD, kernel_code_properties,
-                amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
-  PRINT_FIELD(OS,
+    print_field(KD.kernel_code_properties,
+                amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
+                amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
+                ".amdhsa_uses_dynamic_stack");
+  print_field(KD.compute_pgm_rsrc2,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
               (hasArchitectedFlatScratch(STI)
                    ? ".amdhsa_enable_private_segment"
-                   : ".amdhsa_system_sgpr_private_segment_wavefront_offset"),
-              KD, compute_pgm_rsrc2,
-              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
-  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD,
-              compute_pgm_rsrc2,
-              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
-  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD,
-              compute_pgm_rsrc2,
-              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
-  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD,
-              compute_pgm_rsrc2,
-              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
-  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD,
-              compute_pgm_rsrc2,
-              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
-  PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD,
-              compute_pgm_rsrc2,
-              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
+                   : ".amdhsa_system_sgpr_private_segment_wavefront_offset"));
+  print_field(KD.compute_pgm_rsrc2,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
+              ".amdhsa_system_sgpr_workgroup_id_x");
+  print_field(KD.compute_pgm_rsrc2,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
+              ".amdhsa_system_sgpr_workgroup_id_y");
+  print_field(KD.compute_pgm_rsrc2,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
+              ".amdhsa_system_sgpr_workgroup_id_z");
+  print_field(KD.compute_pgm_rsrc2,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
+              ".amdhsa_system_sgpr_workgroup_info");
+  print_field(KD.compute_pgm_rsrc2,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
+              ".amdhsa_system_vgpr_workitem_id");
 
   // These directives are required.
   OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
   OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n';
 
-  if (AMDGPU::isGFX90A(STI))
-    OS << "\t\t.amdhsa_accum_offset " <<
-      (AMDHSA_BITS_GET(KD.compute_pgm_rsrc3,
-                       amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
-      << '\n';
+  if (AMDGPU::isGFX90A(STI)) {
+    // MCExpr equivalent of taking the (accum_offset + 1) * 4.
+    const MCExpr *accum_bits = amdhsa::kernel_descriptor_t::bits_get(
+        KD.compute_pgm_rsrc3,
+        amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
+        amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
+    accum_bits = MCBinaryExpr::createAdd(
+        accum_bits, MCConstantExpr::create(1, getContext()), getContext());
+    accum_bits = MCBinaryExpr::createMul(
+        accum_bits, MCConstantExpr::create(4, getContext()), getContext());
+    OS << "\t\t.amdhsa_accum_offset ";
+    int64_t IVal;
+    if (accum_bits->evaluateAsAbsolute(IVal)) {
+      OS << static_cast<uint64_t>(IVal);
+    } else {
+      accum_bits->print(OS, MAI);
+    }
+    OS << '\n';
+  }
 
   if (!ReserveVCC)
     OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
@@ -403,74 +454,105 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
     break;
   }
 
-  PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD,
-              compute_pgm_rsrc1,
-              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
-  PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD,
-              compute_pgm_rsrc1,
-              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
-  PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD,
-              compute_pgm_rsrc1,
-              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
-  PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD,
-              compute_pgm_rsrc1,
-              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
+  print_field(KD.compute_pgm_rsrc1,
+              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
+              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
+              ".amdhsa_float_round_mode_32");
+  print_field(KD.compute_pgm_rsrc1,
+              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
+              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
+              ".amdhsa_float_round_mode_16_64");
+  print_field(KD.compute_pgm_rsrc1,
+              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
+              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
+              ".amdhsa_float_denorm_mode_32");
+  print_field(KD.compute_pgm_rsrc1,
+              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
+              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
+              ".amdhsa_float_denorm_mode_16_64");
   if (IVersion.Major < 12) {
-    PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD, compute_pgm_rsrc1,
-                amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
-    PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD, compute_pgm_rsrc1,
-                amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
+    print_field(KD.compute_pgm_rsrc1,
+                amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
+                amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
+                ".amdhsa_dx10_clamp");
+    print_field(KD.compute_pgm_rsrc1,
+                amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
+                amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
+                ".amdhsa_ieee_mode");
+  }
+  if (IVersion.Major >= 9) {
+    print_field(KD.compute_pgm_rsrc1,
+                amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
+                amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
+                ".amdhsa_fp16_overflow");
   }
-  if (IVersion.Major >= 9)
-    PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
-                compute_pgm_rsrc1,
-                amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
   if (AMDGPU::isGFX90A(STI))
-    PRINT_FIELD(OS, ".amdhsa_tg_split", KD,
-                compute_pgm_rsrc3,
-                amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
+    print_field(KD.compute_pgm_rsrc3,
+                amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
+                amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split");
   if (IVersion.Major >= 10) {
-    PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
-                compute_pgm_rsrc1,
-                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
-    PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
-                compute_pgm_rsrc1,
-                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
-    PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
-                compute_pgm_rsrc1,
-                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
+    print_field(KD.compute_pgm_rsrc1,
+                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
+                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
+                ".amdhsa_workgroup_processor_mode");
+    print_field(KD.compute_pgm_rsrc1,
+                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
+                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
+                ".amdhsa_memory_ordered");
+    print_field(KD.compute_pgm_rsrc1,
+                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
+                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
+                ".amdhsa_forward_progress");
   }
   if (IVersion.Major >= 10 && IVersion.Major < 12) {
-    PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
-                amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
+    print_field(KD.compute_pgm_rsrc3,
+                amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
+                amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
+                ".amdhsa_shared_vgpr_count");
+  }
+  if (IVersion.Major >= 12) {
+    print_field(KD.compute_pgm_rsrc1,
+                amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
+                amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
+                ".amdhsa_round_robin_scheduling");
   }
-  if (IVersion.Major >= 12)
-    PRINT_FIELD(OS, ".amdhsa_round_robin_scheduling", KD, compute_pgm_rsrc1,
-                amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
-  PRINT_FIELD(
-      OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
-      compute_pgm_rsrc2,
-      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
-  PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD,
-              compute_pgm_rsrc2,
-              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
-  PRINT_FIELD(
-      OS, ".amdhsa_exception_fp_ieee_div_zero", KD,
-      compute_pgm_rsrc2,
-      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
-  PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD,
-              compute_pgm_rsrc2,
-              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
-  PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD,
-              compute_pgm_rsrc2,
-              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
-  PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD,
-              compute_pgm_rsrc2,
-              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
-  PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD,
-              compute_pgm_rsrc2,
-              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
-#undef PRINT_FIELD
+  print_field(
+      KD.compute_pgm_rsrc2,
+      amdhsa::
+          COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
+      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
+      ".amdhsa_exception_fp_ieee_invalid_op");
+  print_field(
+      KD.compute_pgm_rsrc2,
+      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
+      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
+      ".amdhsa_exception_fp_denorm_src");
+  print_field(
+      KD.compute_pgm_rsrc2,
+      amdhsa::
+          COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
+      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
+      ".amdhsa_exception_fp_ieee_div_zero");
+  print_field(
+      KD.compute_pgm_rsrc2,
+      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
+      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
+      ".amdhsa_exception_fp_ieee_overflow");
+  print_field(
+      KD.compute_pgm_rsrc2,
+      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
+      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
+      ".amdhsa_exception_fp_ieee_underflow");
+  print_field(
+      KD.compute_pgm_rsrc2,
+      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
+      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
+      ".amdhsa_exception_fp_ieee_inexact");
+  print_field(
+      KD.compute_pgm_rsrc2,
+      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
+      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
+      ".amdhsa_exception_int_div_zero");
 
   OS << "\t.end_amdhsa_kernel\n";
 }
@@ -819,7 +901,7 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
   // Kernel descriptor symbol's type and size are fixed.
   KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
   KernelDescriptorSymbol->setSize(
-      MCConstantExpr::create(sizeof(KernelDescriptor), Context));
+      MCConstantExpr::create(amdhsa::SIZEOF_KERNEL_DESCRIPTOR, Context));
 
   // The visibility of the kernel code symbol must be protected or less to allow
   // static relocations from the kernel descriptor to be used.
@@ -827,9 +909,12 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
     KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
 
   Streamer.emitLabel(KernelDescriptorSymbol);
-  Streamer.emitInt32(KernelDescriptor.group_segment_fixed_size);
-  Streamer.emitInt32(KernelDescriptor.private_segment_fixed_size);
-  Streamer.emitInt32(KernelDescriptor.kernarg_size);
+  Streamer.emitValue(KernelDescriptor.group_segment_fixed_size,
+                     amdhsa::SIZEOF_GROUP_SEGMENT_FIXED_SIZE);
+  Streamer.emitValue(KernelDescriptor.private_segment_fixed_size,
+                     amdhsa::SIZEOF_PRIVATE_SEGMENT_FIXED_SIZE);
+  Streamer.emitValue(KernelDescriptor.kernarg_size,
+                     amdhsa::SIZEOF_KERNARG_SIZE);
 
   for (uint8_t Res : KernelDescriptor.reserved0)
     Streamer.emitInt8(Res);
@@ -838,20 +923,26 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
   // expression being created is:
   //   (start of kernel code) - (start of kernel descriptor)
   // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
-  Streamer.emitValue(MCBinaryExpr::createSub(
-      MCSymbolRefExpr::create(
-          KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
-      MCSymbolRefExpr::create(
-          KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context),
-      Context),
-      sizeof(KernelDescriptor.kernel_code_entry_byte_offset));
+  Streamer.emitValue(
+      MCBinaryExpr::createSub(
+          MCSymbolRefExpr::create(KernelCodeSymbol,
+                                  MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
+          MCSymbolRefExpr::create(KernelDescriptorSymbol,
+                                  MCSymbolRefExpr::VK_None, Context),
+          Context),
+      amdhsa::SIZEOF_KERNEL_CODE_ENTRY_BYTE_OFFSET);
   for (uint8_t Res : KernelDescriptor.reserved1)
     Streamer.emitInt8(Res);
-  Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc3);
-  Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc1);
-  Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc2);
-  Streamer.emitInt16(KernelDescriptor.kernel_code_properties);
-  Streamer.emitInt16(KernelDescriptor.kernarg_preload);
+  Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc3,
+                     amdhsa::SIZEOF_COMPUTE_PGM_RSRC3);
+  Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc1,
+                     amdhsa::SIZEOF_COMPUTE_PGM_RSRC1);
+  Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc2,
+                     amdhsa::SIZEOF_COMPUTE_PGM_RSRC2);
+  Streamer.emitValue(KernelDescriptor.kernel_code_properties,
+                     amdhsa::SIZEOF_KERNEL_CODE_PROPERTIES);
+  Streamer.emitValue(KernelDescriptor.kernarg_preload,
+                     amdhsa::SIZEOF_KERNARG_PRELOAD);
   for (uint8_t Res : KernelDescriptor.reserved3)
     Streamer.emitInt8(Res);
 }
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDHSAKernelDescriptor.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDHSAKernelDescriptor.cpp
new file mode 100644
index 00000000000000..905723b7325bc8
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDHSAKernelDescriptor.cpp
@@ -0,0 +1,32 @@
+//===--- AMDHSAKernelDescriptor.h -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/AMDHSAKernelDescriptor.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+
+using namespace llvm;
+using namespace llvm::amdhsa;
+
+void kernel_descriptor_t::bits_set(const MCExpr *&Dst, const MCExpr *Value,
+                                   uint32_t Shift, uint32_t Mask,
+                                   MCContext &Ctx) {
+  auto Sft = MCConstantExpr::create(Shift, Ctx);
+  auto Msk = MCConstantExpr::create(Mask, Ctx);
+  Dst = MCBinaryExpr::createAnd(Dst, MCUnaryExpr::createNot(Msk, Ctx), Ctx);
+  Dst = MCBinaryExpr::createOr(Dst, MCBinaryExpr::createShl(Value, Sft, Ctx),
+                               Ctx);
+}
+
+const MCExpr *kernel_descriptor_t::bits_get(const MCExpr *Src, uint32_t Shift,
+                                            uint32_t Mask, MCContext &Ctx) {
+  auto Sft = MCConstantExpr::create(Shift, Ctx);
+  auto Msk = MCConstantExpr::create(Mask, Ctx);
+  return MCBinaryExpr::createLShr(MCBinaryExpr::createAnd(Src, Msk, Ctx), Sft,
+                                  Ctx);
+}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
index 5dc76071b0594e..51df7bb3ebe840 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_component_library(LLVMAMDGPUDesc
   AMDGPUMCCodeEmitter.cpp
   AMDGPUMCTargetDesc.cpp
   AMDGPUTargetStreamer.cpp
+  AMDHSAKernelDescriptor.cpp
   R600InstPrinter.cpp
   R600MCCodeEmitter.cpp
   R600MCTargetDesc.cpp
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 33335ac75df768..4b37fc7569aa15 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -19,6 +19,7 @@
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsR600.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
@@ -1186,44 +1187,65 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
   }
 }
 
-amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
-    const MCSubtargetInfo *STI) {
+amdhsa::kernel_descriptor_t
+getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx) {
   IsaVersion Version = getIsaVersion(STI->getCPU());
 
   amdhsa::kernel_descriptor_t KD;
   memset(&KD, 0, sizeof(KD));
-
-  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
-                  amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
-                  amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
-  if (Version.Major >= 12) {
-    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
-                    amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, 0);
-    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
-                    amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF, 0);
-  } else {
-    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
-                    amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, 1);
-    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
-                    amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, 1);
+  const MCExpr *ZeroMCExpr = MCConstantExpr::create(0, Ctx);
+  const MCExpr *OneMCExpr = MCConstantExpr::create(1, Ctx);
+
+  KD.group_segment_fixed_size = ZeroMCExpr;
+  KD.private_segment_fixed_size = ZeroMCExpr;
+  KD.compute_pgm_rsrc1 = ZeroMCExpr;
+  KD.compute_pgm_rsrc2 = ZeroMCExpr;
+  KD.compute_pgm_rsrc3 = ZeroMCExpr;
+  KD.kernarg_size = ZeroMCExpr;
+  KD.kernel_code_properties = ZeroMCExpr;
+  KD.kernarg_preload = ZeroMCExpr;
+
+  amdhsa::kernel_descriptor_t::bits_set(
+      KD.compute_pgm_rsrc1,
+      MCConstantExpr::create(amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE, Ctx),
+      amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
+      amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Ctx);
+  if (Version.Major < 12) {
+    amdhsa::kernel_descriptor_t::bits_set(
+        KD.compute_pgm_rsrc1, OneMCExpr,
+        amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
+        amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Ctx);
+    amdhsa::kernel_descriptor_t::bits_set(
+        KD.compute_pgm_rsrc1, OneMCExpr,
+        amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
+        amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Ctx);
   }
-  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
-                  amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
+  amdhsa::kernel_descriptor_t::bits_set(
+      KD.compute_pgm_rsrc2, OneMCExpr,
+      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
+      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Ctx);
   if (Version.Major >= 10) {
-    AMDHSA_BITS_SET(KD.kernel_code_properties,
-                    amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
-                    STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
-    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
-                    amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
-                    STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
-    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
-                    amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1);
-  }
-  if (AMDGPU::isGFX90A(*STI)) {
-    AMDHSA_BITS_SET(KD.compute_pgm_rsrc3,
-                    amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
-                    STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0);
+    if (STI->getFeatureBits().test(FeatureWavefrontSize32))
+      amdhsa::kernel_descriptor_t::bits_set(
+          KD.kernel_code_properties, OneMCExpr,
+          amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
+          amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, Ctx);
+    if (!STI->getFeatureBits().test(FeatureCuMode))
+      amdhsa::kernel_descriptor_t::bits_set(
+          KD.compute_pgm_rsrc1, OneMCExpr,
+          amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
+          amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Ctx);
+
+    amdhsa::kernel_descriptor_t::bits_set(
+        KD.compute_pgm_rsrc1, OneMCExpr,
+        amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
+        amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Ctx);
   }
+  if (AMDGPU::isGFX90A(*STI) && STI->getFeatureBits().test(FeatureTgSplit))
+    amdhsa::kernel_descriptor_t::bits_set(
+        KD.compute_pgm_rsrc3, OneMCExpr,
+        amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
+        amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx);
   return KD;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f24b9f0e3615de..0567a2001fbb48 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -26,6 +26,7 @@ struct Align;
 class Argument;
 class Function;
 class GlobalValue;
+class MCContext;
 class MCInstrInfo;
 class MCRegisterClass;
 class MCRegisterInfo;
@@ -798,8 +799,8 @@ unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
                                const MCSubtargetInfo *STI);
 
-amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
-    const MCSubtargetInfo *STI);
+amdhsa::kernel_descriptor_t
+getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx);
 
 bool isGroupSegment(const GlobalValue *GV);
 bool isGlobalSegment(const GlobalValue *GV);
diff --git a/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s b/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s
index 8b90e20bb87d1f..7b591904e877fd 100644
--- a/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s
+++ b/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s
@@ -29,7 +29,7 @@
 // OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
 // OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
 // OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0030 00000c60 80000000 00000000 00000000
+// OBJDUMP-NEXT: 0030 00000c60 80000000 00040000 00000000
 // complete
 // OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
@@ -39,12 +39,12 @@
 // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
 // OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
 // OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000
+// OBJDUMP-NEXT: 00b0 00000060 80000000 00040000 00000000
 // disabled_user_sgpr
 // OBJDUMP-NEXT: 00c0 00000000 00000000 00000000 00000000
 // OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000
 // OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00f0 00000c60 80000000 00000000 00000000
+// OBJDUMP-NEXT: 00f0 00000c60 80000000 00040000 00000000
 
 .text
 // ASM: .text
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs.s
new file mode 100644
index 00000000000000..a2764d40655bc0
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs.s
@@ -0,0 +1,68 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=obj < %s > %t
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// When going from asm -> asm, the expressions should remain the same (i.e., symbolic).
+// When going from asm -> obj, the expressions should get resolved (through fixups),
+
+// OBJDUMP: Contents of section .rodata
+// expr_defined_later
+// OBJDUMP-NEXT: 0000 2b000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
+// expr_defined
+// OBJDUMP-NEXT: 0040 2d000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0070 0000ac00 80000000 00000000 00000000
+
+.text
+// ASM: .text
+
+.amdhsa_code_object_version 4
+// ASM: .amdhsa_code_object_version 4
+
+.p2align 8
+.type expr_defined_later, at function
+expr_defined_later:
+  s_endpgm
+
+.p2align 8
+.type expr_defined, at function
+expr_defined:
+  s_endpgm
+
+.rodata
+// ASM: .rodata
+
+.p2align 6
+.amdhsa_kernel expr_defined_later
+  .amdhsa_group_segment_fixed_size defined_value+2
+  .amdhsa_next_free_vgpr 0
+  .amdhsa_next_free_sgpr 0
+  .amdhsa_accum_offset 4
+.end_amdhsa_kernel
+
+.set defined_value, 41
+
+.p2align 6
+.amdhsa_kernel expr_defined
+  .amdhsa_group_segment_fixed_size defined_value+4
+  .amdhsa_next_free_vgpr 0
+  .amdhsa_next_free_sgpr 0
+  .amdhsa_accum_offset 4
+.end_amdhsa_kernel
+
+
+
+// ASM: .amdhsa_kernel expr_defined_later
+// ASM: .amdhsa_group_segment_fixed_size defined_value+2
+// ASM: .end_amdhsa_kernel
+
+// ASM:       .set defined_value, 41
+// ASM-NEXT:  .no_dead_strip defined_value
+
+// ASM: .amdhsa_kernel expr_defined
+// ASM: .amdhsa_group_segment_fixed_size 45
+// ASM: .end_amdhsa_kernel
diff --git a/llvm/test/MC/AMDGPU/hsa-tg-split.s b/llvm/test/MC/AMDGPU/hsa-tg-split.s
new file mode 100644
index 00000000000000..5a4d3e2c279c50
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-tg-split.s
@@ -0,0 +1,74 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack,+tgsplit < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack,+tgsplit -filetype=obj < %s > %t
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// OBJDUMP: Contents of section .rodata
+// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000100
+// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
+
+.text
+// ASM: .text
+
+.amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+"
+// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+"
+
+.amdhsa_code_object_version 4
+// ASM: .amdhsa_code_object_version 4
+
+.p2align 8
+.type minimal, at function
+minimal:
+  s_endpgm
+
+.rodata
+// ASM: .rodata
+
+.p2align 6
+.amdhsa_kernel minimal
+  .amdhsa_next_free_vgpr 0
+  .amdhsa_next_free_sgpr 0
+  .amdhsa_accum_offset 4
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel minimal
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 0
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 0
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 0
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_accum_offset 4
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
+// ASM-NEXT: .amdhsa_float_round_mode_32 0
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 0
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 0
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3
+// ASM-NEXT: .amdhsa_dx10_clamp 1
+// ASM-NEXT: .amdhsa_ieee_mode 1
+// ASM-NEXT: .amdhsa_fp16_overflow 0
+// ASM-NEXT: .amdhsa_tg_split 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+// ASM-NEXT: .amdhsa_exception_int_div_zero 0
+// ASM-NEXT: .end_amdhsa_kernel



More information about the llvm-commits mailing list