[llvm] MCExpr-ify amd_kernel_code_t (PR #91587)

Janek van Oirschot via llvm-commits llvm-commits at lists.llvm.org
Fri May 17 07:00:08 PDT 2024


https://github.com/JanekvO updated https://github.com/llvm/llvm-project/pull/91587

>From f2ea08b52bf33305571ddf13402672645397d81b Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Thu, 9 May 2024 13:33:47 +0100
Subject: [PATCH 1/5] MCExpr-ify amd_kernel_code_t

---
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp   |  56 +-
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h     |   5 +-
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |  54 +-
 .../MCTargetDesc/AMDGPUMCKernelCodeT.cpp      | 549 ++++++++++++++++++
 .../AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h |  59 ++
 .../MCTargetDesc/AMDGPUTargetStreamer.cpp     |  13 +-
 .../MCTargetDesc/AMDGPUTargetStreamer.h       |   9 +-
 .../Target/AMDGPU/MCTargetDesc/CMakeLists.txt |   2 +
 llvm/lib/Target/AMDGPU/SIDefines.h            |   2 +-
 .../Target/AMDGPU/Utils/AMDKernelCodeTInfo.h  |  24 +-
 llvm/test/MC/AMDGPU/amd_kernel_code_t.s       | 171 ++++++
 11 files changed, 858 insertions(+), 86 deletions(-)
 create mode 100644 llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
 create mode 100644 llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
 create mode 100644 llvm/test/MC/AMDGPU/amd_kernel_code_t.s

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index de81904143b7b..8343d3d83d22e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -19,10 +19,10 @@
 #include "AMDGPU.h"
 #include "AMDGPUHSAMetadataStreamer.h"
 #include "AMDGPUResourceUsageAnalysis.h"
-#include "AMDKernelCodeT.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUInstPrinter.h"
 #include "MCTargetDesc/AMDGPUMCExpr.h"
+#include "MCTargetDesc/AMDGPUMCKernelCodeT.h"
 #include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
 #include "R600AsmPrinter.h"
@@ -205,8 +205,9 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
   if (STM.isMesaKernel(F) &&
       (F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
        F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
-    amd_kernel_code_t KernelCode;
+    MCKernelCodeT KernelCode;
     getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
+    KernelCode.validate(&STM, MF->getContext());
     getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
   }
 
@@ -1320,7 +1321,7 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
   }
 }
 
-void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
+void AMDGPUAsmPrinter::getAmdKernelCode(MCKernelCodeT &Out,
                                         const SIProgramInfo &CurrentProgramInfo,
                                         const MachineFunction &MF) const {
   const Function &F = MF.getFunction();
@@ -1331,59 +1332,62 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
   MCContext &Ctx = MF.getContext();
 
-  AMDGPU::initDefaultAMDKernelCodeT(Out, &STM);
+  AMDGPU::initDefaultAMDKernelCodeT(Out.KernelCode, &STM);
 
-  Out.compute_pgm_resource_registers =
-      CurrentProgramInfo.getComputePGMRSrc1(STM) |
-      (CurrentProgramInfo.getComputePGMRSrc2() << 32);
-  Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
+  Out.compute_pgm_resource1_registers =
+      CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx);
+  Out.compute_pgm_resource2_registers =
+      CurrentProgramInfo.getComputePGMRSrc2(Ctx);
+  Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
 
-  if (getMCExprValue(CurrentProgramInfo.DynamicCallStack, Ctx))
-    Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK;
+  {
+    const MCExpr *Shift = MCConstantExpr::create(AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT, Ctx);
+    Out.is_dynamic_callstack = MCBinaryExpr::createShl(
+        CurrentProgramInfo.DynamicCallStack, Shift, Ctx);
+  }
 
-  AMD_HSA_BITS_SET(Out.code_properties,
+  AMD_HSA_BITS_SET(Out.KernelCode.code_properties,
                    AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
                    getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
 
   const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();
   if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
-    Out.code_properties |=
+    Out.KernelCode.code_properties |=
       AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
   }
 
   if (UserSGPRInfo.hasDispatchPtr())
-    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+    Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
 
   if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5)
-    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
+    Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
 
   if (UserSGPRInfo.hasKernargSegmentPtr())
-    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
+    Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
 
   if (UserSGPRInfo.hasDispatchID())
-    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
+    Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
 
   if (UserSGPRInfo.hasFlatScratchInit())
-    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+    Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
 
   if (UserSGPRInfo.hasDispatchPtr())
-    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+    Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
 
   if (STM.isXNACKEnabled())
-    Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
+    Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
 
   Align MaxKernArgAlign;
-  Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
-  Out.wavefront_sgpr_count = getMCExprValue(CurrentProgramInfo.NumSGPR, Ctx);
-  Out.workitem_vgpr_count = getMCExprValue(CurrentProgramInfo.NumVGPR, Ctx);
-  Out.workitem_private_segment_byte_size =
-      getMCExprValue(CurrentProgramInfo.ScratchSize, Ctx);
-  Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
+  Out.KernelCode.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
+  Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
+  Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
+  Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
+  Out.KernelCode.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
 
   // kernarg_segment_alignment is specified as log of the alignment.
   // The minimum alignment is 16.
   // FIXME: The metadata treats the minimum as 4?
-  Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
+  Out.KernelCode.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
 }
 
 bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 16d8952a533ef..c5abbd3c8c084 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -17,8 +17,6 @@
 #include "SIProgramInfo.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 
-struct amd_kernel_code_t;
-
 namespace llvm {
 
 class AMDGPUMachineFunction;
@@ -29,6 +27,7 @@ class MCOperand;
 
 namespace AMDGPU {
 struct MCKernelDescriptor;
+struct MCKernelCodeT;
 namespace HSAMD {
 class MetadataStreamer;
 }
@@ -50,7 +49,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
   uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
 
   void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
-  void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
+  void getAmdKernelCode(AMDGPU::MCKernelCodeT &Out, const SIProgramInfo &KernelInfo,
                         const MachineFunction &MF) const;
 
   /// Emit register usage information so that the GPU driver
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index d47a5f8ebb815..b8bdf816a9932 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -8,6 +8,7 @@
 
 #include "AMDKernelCodeT.h"
 #include "MCTargetDesc/AMDGPUMCExpr.h"
+#include "MCTargetDesc/AMDGPUMCKernelCodeT.h"
 #include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
@@ -1340,7 +1341,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
   bool ParseDirectiveAMDGCNTarget();
   bool ParseDirectiveAMDHSACodeObjectVersion();
   bool ParseDirectiveAMDHSAKernel();
-  bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
+  bool ParseAMDKernelCodeTValue(StringRef ID, MCKernelCodeT &Header);
   bool ParseDirectiveAMDKernelCodeT();
   // TODO: Possibly make subtargetHasRegister const.
   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
@@ -5872,8 +5873,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
   return false;
 }
 
-bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
-                                               amd_kernel_code_t &Header) {
+bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, MCKernelCodeT &C) {
   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
   // assembly for backwards compatibility.
   if (ID == "max_scratch_backing_memory_byte_size") {
@@ -5883,25 +5883,14 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
 
   SmallString<40> ErrStr;
   raw_svector_ostream Err(ErrStr);
-  if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
+  if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
     return TokError(Err.str());
   }
   Lex();
 
-  if (ID == "enable_dx10_clamp") {
-    if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
-        isGFX12Plus())
-      return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
-  }
-
-  if (ID == "enable_ieee_mode") {
-    if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
-        isGFX12Plus())
-      return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
-  }
-
   if (ID == "enable_wavefront_size32") {
-    if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
+    if (C.KernelCode.code_properties &
+        AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
       if (!isGFX10Plus())
         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
@@ -5913,41 +5902,23 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
   }
 
   if (ID == "wavefront_size") {
-    if (Header.wavefront_size == 5) {
+    if (C.KernelCode.wavefront_size == 5) {
       if (!isGFX10Plus())
         return TokError("wavefront_size=5 is only allowed on GFX10+");
       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
         return TokError("wavefront_size=5 requires +WavefrontSize32");
-    } else if (Header.wavefront_size == 6) {
+    } else if (C.KernelCode.wavefront_size == 6) {
       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
         return TokError("wavefront_size=6 requires +WavefrontSize64");
     }
   }
 
-  if (ID == "enable_wgp_mode") {
-    if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
-        !isGFX10Plus())
-      return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
-  }
-
-  if (ID == "enable_mem_ordered") {
-    if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
-        !isGFX10Plus())
-      return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
-  }
-
-  if (ID == "enable_fwd_progress") {
-    if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
-        !isGFX10Plus())
-      return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
-  }
-
   return false;
 }
 
 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
-  amd_kernel_code_t Header;
-  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
+  MCKernelCodeT KernelCode;
+  KernelCode.initDefault(&getSTI(), getContext());
 
   while (true) {
     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
@@ -5961,11 +5932,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
     if (ID == ".end_amd_kernel_code_t")
       break;
 
-    if (ParseAMDKernelCodeTValue(ID, Header))
+    if (ParseAMDKernelCodeTValue(ID, KernelCode))
       return true;
   }
 
-  getTargetStreamer().EmitAMDKernelCodeT(Header);
+  KernelCode.validate(&getSTI(), getContext());
+  getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
 
   return false;
 }
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
new file mode 100644
index 0000000000000..7c081d98dadbf
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
@@ -0,0 +1,549 @@
+//===--- AMDHSAKernelCodeT.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCKernelCodeT.h"
+#include "AMDKernelCodeT.h"
+#include "SIDefines.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::AMDGPU;
+
+// Generates the following for MCKernelCodeT struct members:
+//   - HasMemberXXXXX class
+//     A check to see if MCKernelCodeT has a specific member so it can determine
+//     which of the original amd_kernel_code_t members are duplicated (if the
+//     names don't match, the table driven strategy won't work).
+//   - GetMemberXXXXX class
+//     A retrieval helper for said member (of type const MCExpr *&). Will return
+//     a `Phony` const MCExpr * initialized to nullptr to preserve reference
+//     returns.
+#define GEN_HAS_MEMBER(member)                                                 \
+  class HasMember##member {                                                    \
+  private:                                                                     \
+    struct KnownWithMember {                                                   \
+      int member;                                                              \
+    };                                                                         \
+    class AmbiguousDerived : public MCKernelCodeT, public KnownWithMember {};  \
+    template <typename U>                                                      \
+    static constexpr std::false_type Test(decltype(U::member) *);              \
+    template <typename U> static constexpr std::true_type Test(...);           \
+                                                                               \
+  public:                                                                      \
+    static constexpr bool RESULT =                                             \
+        std::is_same_v<decltype(Test<AmbiguousDerived>(nullptr)),              \
+                       std::true_type>;                                        \
+  };                                                                           \
+  class GetMember##member {                                                    \
+  public:                                                                      \
+    static const MCExpr *Phony;                                                \
+    template <typename U, typename std::enable_if_t<HasMember##member::RESULT, \
+                                                    U> * = nullptr>            \
+    static const MCExpr *&Get(U &C) {                                          \
+      assert(HasMember##member::RESULT &&                                      \
+             "Trying to retrieve member that does not exist.");                \
+      return C.member;                                                         \
+    }                                                                          \
+    template <typename U, typename std::enable_if_t<                           \
+                              !HasMember##member::RESULT, U> * = nullptr>      \
+    static const MCExpr *&Get(U &C) {                                          \
+      return Phony;                                                            \
+    }                                                                          \
+  };                                                                           \
+  const MCExpr *GetMember##member::Phony = nullptr;
+
+// Cannot generate class declarations using the table driver approach (see table
+// in AMDKernelCodeTInfo.h). Luckily, if any are missing here or eventually
+// added to the table, an error should occur when trying to retrieve the table
+// in getMCExprIndexTable.
+GEN_HAS_MEMBER(amd_code_version_major)
+GEN_HAS_MEMBER(amd_code_version_minor)
+GEN_HAS_MEMBER(amd_machine_kind)
+GEN_HAS_MEMBER(amd_machine_version_major)
+GEN_HAS_MEMBER(amd_machine_version_minor)
+GEN_HAS_MEMBER(amd_machine_version_stepping)
+
+GEN_HAS_MEMBER(kernel_code_entry_byte_offset)
+GEN_HAS_MEMBER(kernel_code_prefetch_byte_size)
+
+GEN_HAS_MEMBER(granulated_workitem_vgpr_count)
+GEN_HAS_MEMBER(granulated_wavefront_sgpr_count)
+GEN_HAS_MEMBER(priority)
+GEN_HAS_MEMBER(float_mode)
+GEN_HAS_MEMBER(priv)
+GEN_HAS_MEMBER(enable_dx10_clamp)
+GEN_HAS_MEMBER(debug_mode)
+GEN_HAS_MEMBER(enable_ieee_mode)
+GEN_HAS_MEMBER(enable_wgp_mode)
+GEN_HAS_MEMBER(enable_mem_ordered)
+GEN_HAS_MEMBER(enable_fwd_progress)
+
+GEN_HAS_MEMBER(enable_sgpr_private_segment_wave_byte_offset)
+GEN_HAS_MEMBER(user_sgpr_count)
+GEN_HAS_MEMBER(enable_trap_handler)
+GEN_HAS_MEMBER(enable_sgpr_workgroup_id_x)
+GEN_HAS_MEMBER(enable_sgpr_workgroup_id_y)
+GEN_HAS_MEMBER(enable_sgpr_workgroup_id_z)
+GEN_HAS_MEMBER(enable_sgpr_workgroup_info)
+GEN_HAS_MEMBER(enable_vgpr_workitem_id)
+GEN_HAS_MEMBER(enable_exception_msb)
+GEN_HAS_MEMBER(granulated_lds_size)
+GEN_HAS_MEMBER(enable_exception)
+
+GEN_HAS_MEMBER(enable_sgpr_private_segment_buffer)
+GEN_HAS_MEMBER(enable_sgpr_dispatch_ptr)
+GEN_HAS_MEMBER(enable_sgpr_queue_ptr)
+GEN_HAS_MEMBER(enable_sgpr_kernarg_segment_ptr)
+GEN_HAS_MEMBER(enable_sgpr_dispatch_id)
+GEN_HAS_MEMBER(enable_sgpr_flat_scratch_init)
+GEN_HAS_MEMBER(enable_sgpr_private_segment_size)
+GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_x)
+GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_y)
+GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_z)
+GEN_HAS_MEMBER(enable_wavefront_size32)
+GEN_HAS_MEMBER(enable_ordered_append_gds)
+GEN_HAS_MEMBER(private_element_size)
+GEN_HAS_MEMBER(is_ptr64)
+GEN_HAS_MEMBER(is_dynamic_callstack)
+GEN_HAS_MEMBER(is_debug_enabled)
+GEN_HAS_MEMBER(is_xnack_enabled)
+
+GEN_HAS_MEMBER(workitem_private_segment_byte_size)
+GEN_HAS_MEMBER(workgroup_group_segment_byte_size)
+GEN_HAS_MEMBER(gds_segment_byte_size)
+GEN_HAS_MEMBER(kernarg_segment_byte_size)
+GEN_HAS_MEMBER(workgroup_fbarrier_count)
+GEN_HAS_MEMBER(wavefront_sgpr_count)
+GEN_HAS_MEMBER(workitem_vgpr_count)
+GEN_HAS_MEMBER(reserved_vgpr_first)
+GEN_HAS_MEMBER(reserved_vgpr_count)
+GEN_HAS_MEMBER(reserved_sgpr_first)
+GEN_HAS_MEMBER(reserved_sgpr_count)
+GEN_HAS_MEMBER(debug_wavefront_private_segment_offset_sgpr)
+GEN_HAS_MEMBER(debug_private_segment_buffer_sgpr)
+GEN_HAS_MEMBER(kernarg_segment_alignment)
+GEN_HAS_MEMBER(group_segment_alignment)
+GEN_HAS_MEMBER(private_segment_alignment)
+GEN_HAS_MEMBER(wavefront_size)
+GEN_HAS_MEMBER(call_convention)
+GEN_HAS_MEMBER(runtime_loader_kernel_symbol)
+
+static ArrayRef<StringRef> get_amd_kernel_code_t_FldNames() {
+  static StringRef const Table[] = {
+    "", // not found placeholder
+#define RECORD(name, altName, print, parse) #name
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+  };
+  return ArrayRef(Table);
+}
+
+static ArrayRef<StringRef> get_amd_kernel_code_t_FldAltNames() {
+  static StringRef const Table[] = {
+    "", // not found placeholder
+#define RECORD(name, altName, print, parse) #altName
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+  };
+  return ArrayRef(Table);
+}
+
+static ArrayRef<bool> hasMCExprVersionTable() {
+  static bool const Table[] = {
+#define RECORD(name, altName, print, parse) (HasMember##name::RESULT)
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+  };
+  return ArrayRef(Table);
+}
+
+static ArrayRef<std::reference_wrapper<const MCExpr *>>
+getMCExprIndexTable(MCKernelCodeT &C) {
+  static std::reference_wrapper<const MCExpr *> Table[] = {
+#define RECORD(name, altName, print, parse) GetMember##name::Get(C)
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+  };
+  return ArrayRef(Table);
+}
+
+static StringMap<int> createIndexMap(const ArrayRef<StringRef> &names,
+                                     const ArrayRef<StringRef> &altNames) {
+  StringMap<int> map;
+  assert(names.size() == altNames.size());
+  for (unsigned i = 0; i < names.size(); ++i) {
+    map.insert(std::pair(names[i], i));
+    map.insert(std::pair(altNames[i], i));
+  }
+  return map;
+}
+
+static int get_amd_kernel_code_t_FieldIndex(StringRef name) {
+  static const auto map = createIndexMap(get_amd_kernel_code_t_FldNames(),
+                                         get_amd_kernel_code_t_FldAltNames());
+  return map.lookup(name) - 1; // returns -1 if not found
+}
+
+static constexpr std::pair<unsigned, unsigned> getShiftMask(unsigned Value) {
+  unsigned Shift = 0;
+  unsigned Mask = 0;
+
+  Mask = ~Value;
+  for (; !(Mask & 1); Shift++, Mask >>= 1) {
+  }
+
+  return std::make_pair(Shift, Mask);
+}
+
+static const MCExpr *MaskShiftSet(const MCExpr *Val, uint32_t Mask,
+                                  uint32_t Shift, MCContext &Ctx) {
+  if (Mask) {
+    const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
+    Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
+  }
+  if (Shift) {
+    const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
+    Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx);
+  }
+  return Val;
+}
+
+static const MCExpr *MaskShiftGet(const MCExpr *Val, uint32_t Mask,
+                                  uint32_t Shift, MCContext &Ctx) {
+  if (Shift) {
+    const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
+    Val = MCBinaryExpr::createLShr(Val, ShiftExpr, Ctx);
+  }
+  if (Mask) {
+    const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
+    Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
+  }
+  return Val;
+}
+
+template <typename T, T amd_kernel_code_t::*ptr>
+static void printField(StringRef Name, const MCKernelCodeT &C, raw_ostream &OS,
+                       MCContext &Ctx) {
+  (void)Ctx;
+  OS << Name << " = ";
+  OS << (int)(C.KernelCode.*ptr);
+}
+
+template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
+static void printBitField(StringRef Name, const MCKernelCodeT &C,
+                          raw_ostream &OS, MCContext &Ctx) {
+  (void)Ctx;
+  const auto Mask = (static_cast<T>(1) << width) - 1;
+  OS << Name << " = ";
+  OS << (int)((C.KernelCode.*ptr >> shift) & Mask);
+}
+
+using PrintFx = void (*)(StringRef, const MCKernelCodeT &, raw_ostream &,
+                         MCContext &);
+
+static ArrayRef<PrintFx> getPrinterTable() {
+  static const PrintFx Table[] = {
+#define COMPPGM1(name, aname, AccMacro)                                        \
+  COMPPGM(name, aname, C_00B848_##AccMacro, S_00B848_##AccMacro, 0)
+#define COMPPGM2(name, aname, AccMacro)                                        \
+  COMPPGM(name, aname, C_00B84C_##AccMacro, S_00B84C_##AccMacro, 32)
+#define PRINTCOMP(Complement, PGMType)                                         \
+  [](StringRef Name, const MCKernelCodeT &C, raw_ostream &OS,                  \
+     MCContext &Ctx) {                                                         \
+    OS << Name << " = ";                                                       \
+    auto [Shift, Mask] = getShiftMask(Complement);                             \
+    const MCExpr *Value;                                                       \
+    if (PGMType == 0) {                                                        \
+      Value =                                                                  \
+          MaskShiftGet(C.compute_pgm_resource1_registers, Mask, Shift, Ctx);   \
+    } else {                                                                   \
+      Value =                                                                  \
+          MaskShiftGet(C.compute_pgm_resource2_registers, Mask, Shift, Ctx);   \
+    }                                                                          \
+    int64_t Val;                                                               \
+    if (Value->evaluateAsAbsolute(Val))                                        \
+      OS << Val;                                                               \
+    else                                                                       \
+      Value->print(OS, Ctx.getAsmInfo());                                      \
+  }
+#define RECORD(name, altName, print, parse) print
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+  };
+  return ArrayRef(Table);
+}
+
+static bool expectAbsExpression(MCAsmParser &MCParser, int64_t &Value,
+                                raw_ostream &Err) {
+
+  if (MCParser.getLexer().isNot(AsmToken::Equal)) {
+    Err << "expected '='";
+    return false;
+  }
+  MCParser.getLexer().Lex();
+
+  if (MCParser.parseAbsoluteExpression(Value)) {
+    Err << "integer absolute expression expected";
+    return false;
+  }
+  return true;
+}
+
+template <typename T, T amd_kernel_code_t::*ptr>
+static bool parseField(MCKernelCodeT &C, MCAsmParser &MCParser,
+                       raw_ostream &Err) {
+  int64_t Value = 0;
+  if (!expectAbsExpression(MCParser, Value, Err))
+    return false;
+  C.KernelCode.*ptr = (T)Value;
+  return true;
+}
+
+template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
+static bool parseBitField(MCKernelCodeT &C, MCAsmParser &MCParser,
+                          raw_ostream &Err) {
+  int64_t Value = 0;
+  if (!expectAbsExpression(MCParser, Value, Err))
+    return false;
+  const uint64_t Mask = ((UINT64_C(1) << width) - 1) << shift;
+  C.KernelCode.*ptr &= (T)~Mask;
+  C.KernelCode.*ptr |= (T)((Value << shift) & Mask);
+  return true;
+}
+
+static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value,
+                      raw_ostream &Err) {
+  if (MCParser.getLexer().isNot(AsmToken::Equal)) {
+    Err << "expected '='";
+    return false;
+  }
+  MCParser.getLexer().Lex();
+
+  if (MCParser.parseExpression(Value)) {
+    Err << "Could not parse expression";
+    return false;
+  }
+  return true;
+}
+
+using ParseFx = bool (*)(MCKernelCodeT &, MCAsmParser &, raw_ostream &);
+
+static ArrayRef<ParseFx> getParserTable() {
+  static const ParseFx Table[] = {
+#define COMPPGM1(name, aname, AccMacro)                                        \
+  COMPPGM(name, aname, G_00B848_##AccMacro, C_00B848_##AccMacro, 0)
+#define COMPPGM2(name, aname, AccMacro)                                        \
+  COMPPGM(name, aname, G_00B84C_##AccMacro, C_00B84C_##AccMacro, 32)
+#define PARSECOMP(Complement, PGMType)                                         \
+  [](MCKernelCodeT &C, MCAsmParser &MCParser, raw_ostream &Err) -> bool {      \
+    MCContext &Ctx = MCParser.getContext();                                    \
+    const MCExpr *Value;                                                       \
+    if (!parseExpr(MCParser, Value, Err))                                      \
+      return false;                                                            \
+    auto [Shift, Mask] = getShiftMask(Complement);                             \
+    Value = MaskShiftSet(Value, Mask, Shift, Ctx);                             \
+    const MCExpr *Compl = MCConstantExpr::create(Complement, Ctx);             \
+    if (PGMType == 0) {                                                        \
+      C.compute_pgm_resource1_registers = MCBinaryExpr::createAnd(             \
+          C.compute_pgm_resource1_registers, Compl, Ctx);                      \
+      C.compute_pgm_resource1_registers = MCBinaryExpr::createOr(              \
+          C.compute_pgm_resource1_registers, Value, Ctx);                      \
+    } else {                                                                   \
+      C.compute_pgm_resource2_registers = MCBinaryExpr::createAnd(             \
+          C.compute_pgm_resource2_registers, Compl, Ctx);                      \
+      C.compute_pgm_resource2_registers = MCBinaryExpr::createOr(              \
+          C.compute_pgm_resource2_registers, Value, Ctx);                      \
+    }                                                                          \
+    return true;                                                               \
+  }
+#define RECORD(name, altName, print, parse) parse
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+  };
+  return ArrayRef(Table);
+}
+
+static void printAmdKernelCodeField(const MCKernelCodeT &C, int FldIndex,
+                                    raw_ostream &OS, MCContext &Ctx) {
+  auto Printer = getPrinterTable()[FldIndex];
+  if (Printer)
+    Printer(get_amd_kernel_code_t_FldNames()[FldIndex + 1], C, OS, Ctx);
+}
+
+void MCKernelCodeT::initDefault(const MCSubtargetInfo *STI, MCContext &Ctx) {
+  AMDGPU::initDefaultAMDKernelCodeT(KernelCode, STI);
+  const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
+  compute_pgm_resource1_registers = MCConstantExpr::create(
+      KernelCode.compute_pgm_resource_registers & 0xFFFFFFFF, Ctx);
+  compute_pgm_resource2_registers = MCConstantExpr::create(
+      (KernelCode.compute_pgm_resource_registers >> 32) & 0xffffffff, Ctx);
+  is_dynamic_callstack = ZeroExpr;
+  wavefront_sgpr_count = ZeroExpr;
+  workitem_vgpr_count = ZeroExpr;
+  workitem_private_segment_byte_size = ZeroExpr;
+}
+
+void MCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) {
+  int64_t Value;
+  if (!compute_pgm_resource1_registers->evaluateAsAbsolute(Value))
+    return;
+
+  if (G_00B848_DX10_CLAMP(Value) && AMDGPU::isGFX12Plus(*STI)) {
+    Ctx.reportError({}, "enable_dx10_clamp=1 is not allowed on GFX12+");
+    return;
+  }
+
+  if (G_00B848_IEEE_MODE(Value) && AMDGPU::isGFX12Plus(*STI)) {
+    Ctx.reportError({}, "enable_ieee_mode=1 is not allowed on GFX12+");
+    return;
+  }
+
+  if (G_00B848_WGP_MODE(Value) && !AMDGPU::isGFX10Plus(*STI)) {
+    Ctx.reportError({}, "enable_wgp_mode=1 is only allowed on GFX10+");
+    return;
+  }
+
+  if (G_00B848_MEM_ORDERED(Value) && !AMDGPU::isGFX10Plus(*STI)) {
+    Ctx.reportError({}, "enable_mem_ordered=1 is only allowed on GFX10+");
+    return;
+  }
+
+  if (G_00B848_FWD_PROGRESS(Value) && !AMDGPU::isGFX10Plus(*STI)) {
+    Ctx.reportError({}, "enable_fwd_progress=1 is only allowed on GFX10+");
+    return;
+  }
+}
+
+const MCExpr *&MCKernelCodeT::getMCExprForIndex(int Index) {
+  auto IndexTable = getMCExprIndexTable(*this);
+  return IndexTable[Index].get();
+}
+
+bool MCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser,
+                                     raw_ostream &Err) {
+  const int Idx = get_amd_kernel_code_t_FieldIndex(ID);
+  if (Idx < 0) {
+    Err << "unexpected amd_kernel_code_t field name " << ID;
+    return false;
+  }
+
+  if (hasMCExprVersionTable()[Idx]) {
+    const MCExpr *Value;
+    if (!parseExpr(MCParser, Value, Err))
+      return false;
+    getMCExprForIndex(Idx) = Value;
+    return true;
+  }
+  auto Parser = getParserTable()[Idx];
+  return Parser ? Parser(*this, MCParser, Err) : false;
+}
+
+void MCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, const char *tab,
+                                    MCContext &Ctx) {
+  const int Size = hasMCExprVersionTable().size();
+  for (int i = 0; i < Size; ++i) {
+    OS << tab;
+    if (hasMCExprVersionTable()[i]) {
+      OS << get_amd_kernel_code_t_FldNames()[i + 1] << " = ";
+      int64_t Val;
+      const MCExpr *Value = getMCExprForIndex(i);
+      if (Value->evaluateAsAbsolute(Val))
+        OS << Val;
+      else
+        Value->print(OS, Ctx.getAsmInfo());
+    } else {
+      printAmdKernelCodeField(*this, i, OS, Ctx);
+    }
+    OS << '\n';
+  }
+}
+
+void MCKernelCodeT::EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx) {
+  OS.emitIntValue(KernelCode.amd_kernel_code_version_major, /*Size=*/4);
+  OS.emitIntValue(KernelCode.amd_kernel_code_version_minor, /*Size=*/4);
+  OS.emitIntValue(KernelCode.amd_machine_kind, /*Size=*/2);
+  OS.emitIntValue(KernelCode.amd_machine_version_major, /*Size=*/2);
+  OS.emitIntValue(KernelCode.amd_machine_version_minor, /*Size=*/2);
+  OS.emitIntValue(KernelCode.amd_machine_version_stepping, /*Size=*/2);
+  OS.emitIntValue(KernelCode.kernel_code_entry_byte_offset, /*Size=*/8);
+  OS.emitIntValue(KernelCode.kernel_code_prefetch_byte_offset, /*Size=*/8);
+  OS.emitIntValue(KernelCode.kernel_code_prefetch_byte_size, /*Size=*/8);
+  OS.emitIntValue(KernelCode.reserved0, /*Size=*/8);
+
+  if (compute_pgm_resource1_registers != nullptr)
+    OS.emitValue(compute_pgm_resource1_registers, /*Size=*/4);
+  else
+    OS.emitIntValue(KernelCode.compute_pgm_resource_registers & 0xFFFFFFFF,
+                    /*Size=*/4);
+
+  if (compute_pgm_resource2_registers != nullptr)
+    OS.emitValue(compute_pgm_resource2_registers, /*Size=*/4);
+  else
+    OS.emitIntValue((KernelCode.compute_pgm_resource_registers >> 32) &
+                        0xFFFFFFFF,
+                    /*Size=*/4);
+
+  if (is_dynamic_callstack != nullptr) {
+    const MCExpr *CodeProps =
+        MCConstantExpr::create(KernelCode.code_properties, Ctx);
+    CodeProps = MCBinaryExpr::createOr(
+        CodeProps,
+        MaskShiftSet(is_dynamic_callstack,
+                     (1 << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH) - 1,
+                     AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT, Ctx),
+        Ctx);
+    OS.emitValue(CodeProps, /*Size=*/4);
+  } else
+    OS.emitIntValue(KernelCode.code_properties, /*Size=*/4);
+
+  if (workitem_private_segment_byte_size != nullptr)
+    OS.emitValue(workitem_private_segment_byte_size, /*Size=*/4);
+  else
+    OS.emitIntValue(KernelCode.workitem_private_segment_byte_size, /*Size=*/4);
+
+  OS.emitIntValue(KernelCode.workgroup_group_segment_byte_size, /*Size=*/4);
+  OS.emitIntValue(KernelCode.gds_segment_byte_size, /*Size=*/4);
+  OS.emitIntValue(KernelCode.kernarg_segment_byte_size, /*Size=*/8);
+  OS.emitIntValue(KernelCode.workgroup_fbarrier_count, /*Size=*/4);
+
+  if (wavefront_sgpr_count != nullptr)
+    OS.emitValue(wavefront_sgpr_count, /*Size=*/2);
+  else
+    OS.emitIntValue(KernelCode.wavefront_sgpr_count, /*Size=*/2);
+
+  if (workitem_vgpr_count != nullptr)
+    OS.emitValue(workitem_vgpr_count, /*Size=*/2);
+  else
+    OS.emitIntValue(KernelCode.workitem_vgpr_count, /*Size=*/2);
+
+  OS.emitIntValue(KernelCode.reserved_vgpr_first, /*Size=*/2);
+  OS.emitIntValue(KernelCode.reserved_vgpr_count, /*Size=*/2);
+  OS.emitIntValue(KernelCode.reserved_sgpr_first, /*Size=*/2);
+  OS.emitIntValue(KernelCode.reserved_sgpr_count, /*Size=*/2);
+  OS.emitIntValue(KernelCode.debug_wavefront_private_segment_offset_sgpr,
+                  /*Size=*/2);
+  OS.emitIntValue(KernelCode.debug_private_segment_buffer_sgpr, /*Size=*/2);
+  OS.emitIntValue(KernelCode.kernarg_segment_alignment, /*Size=*/1);
+  OS.emitIntValue(KernelCode.group_segment_alignment, /*Size=*/1);
+  OS.emitIntValue(KernelCode.private_segment_alignment, /*Size=*/1);
+  OS.emitIntValue(KernelCode.wavefront_size, /*Size=*/1);
+
+  OS.emitIntValue(KernelCode.call_convention, /*Size=*/4);
+  OS.emitBytes(StringRef((const char *)KernelCode.reserved3, /*Size=*/12));
+  OS.emitIntValue(KernelCode.runtime_loader_kernel_symbol, /*Size=*/8);
+  OS.emitBytes(
+      StringRef((const char *)KernelCode.control_directives, /*Size=*/16 * 8));
+}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
new file mode 100644
index 0000000000000..66c5d1107487b
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
@@ -0,0 +1,59 @@
+//===--- AMDGPUMCKernelCodeT.h --------------------------------*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// MC layer struct for amd_kernel_code_t, provides MCExpr functionality where
+/// required.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELCODET_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELCODET_H
+
+#include "AMDKernelCodeT.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+class MCAsmParser;
+class MCContext;
+class MCExpr;
+class MCStreamer;
+class MCSubtargetInfo;
+class raw_ostream;
+namespace AMDGPU {
+
+struct MCKernelCodeT {
+  MCKernelCodeT() = default;
+
+  amd_kernel_code_t KernelCode;
+  const MCExpr *compute_pgm_resource1_registers = nullptr;
+  const MCExpr *compute_pgm_resource2_registers = nullptr;
+
+  // Duplicated fields, but uses MCExpr instead.
+  // Name has to be the same as the ones used in AMDKernelCodeTInfo.h.
+  const MCExpr *is_dynamic_callstack = nullptr;
+  const MCExpr *wavefront_sgpr_count = nullptr;
+  const MCExpr *workitem_vgpr_count = nullptr;
+  const MCExpr *workitem_private_segment_byte_size = nullptr;
+
+  void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx);
+  void validate(const MCSubtargetInfo *STI, MCContext &Ctx);
+
+  const MCExpr *&getMCExprForIndex(int Index);
+
+  bool ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser, raw_ostream &Err);
+  void EmitKernelCodeT(raw_ostream &OS, const char *tab, MCContext &Ctx);
+  void EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx);
+};
+
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELCODET_H
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 02fe7be06280e..e58409e8b0cbc 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -11,9 +11,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPUTargetStreamer.h"
+#include "AMDGPUMCKernelCodeT.h"
 #include "AMDGPUMCKernelDescriptor.h"
 #include "AMDGPUPTNote.h"
-#include "AMDKernelCodeT.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "Utils/AMDKernelCodeTUtils.h"
 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
@@ -240,10 +240,9 @@ void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
   OS << "\t.amdhsa_code_object_version " << COV << '\n';
 }
 
-void
-AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
+void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(MCKernelCodeT &Header) {
   OS << "\t.amd_kernel_code_t\n";
-  dumpAmdKernelCode(&Header, OS, "\t\t");
+  Header.EmitKernelCodeT(OS, "\t\t", getContext());
   OS << "\t.end_amd_kernel_code_t\n";
 }
 
@@ -789,12 +788,10 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
 
 void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
 
-void
-AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
-
+void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(MCKernelCodeT &Header) {
   MCStreamer &OS = getStreamer();
   OS.pushSection();
-  OS.emitBytes(StringRef((const char*)&Header, sizeof(Header)));
+  Header.EmitKernelCodeT(OS, getContext());
   OS.popSection();
 }
 
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 706897a5dc1f4..ea5d1d379f785 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -13,8 +13,6 @@
 #include "Utils/AMDGPUPALMetadata.h"
 #include "llvm/MC/MCStreamer.h"
 
-struct amd_kernel_code_t;
-
 namespace llvm {
 
 class MCELFStreamer;
@@ -23,6 +21,7 @@ class formatted_raw_ostream;
 
 namespace AMDGPU {
 
+struct MCKernelCodeT;
 struct MCKernelDescriptor;
 namespace HSAMD {
 struct Metadata;
@@ -54,7 +53,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
     CodeObjectVersion = COV;
   }
 
-  virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header){};
+  virtual void EmitAMDKernelCodeT(AMDGPU::MCKernelCodeT &Header){};
 
   virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type){};
 
@@ -130,7 +129,7 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
 
   void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override;
 
-  void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
+  void EmitAMDKernelCodeT(AMDGPU::MCKernelCodeT &Header) override;
 
   void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
 
@@ -186,7 +185,7 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
 
   void EmitDirectiveAMDGCNTarget() override;
 
-  void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
+  void EmitAMDKernelCodeT(AMDGPU::MCKernelCodeT &Header) override;
 
   void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
 
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
index 14a02b6d8e368..5ff44ee70afa6 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_component_library(LLVMAMDGPUDesc
   AMDGPUMCExpr.cpp
   AMDGPUMCTargetDesc.cpp
   AMDGPUTargetStreamer.cpp
+  AMDGPUMCKernelCodeT.cpp
   AMDGPUMCKernelDescriptor.cpp
   R600InstPrinter.cpp
   R600MCCodeEmitter.cpp
@@ -20,6 +21,7 @@ add_llvm_component_library(LLVMAMDGPUDesc
   CodeGenTypes
   Core
   MC
+  MCParser
   Support
   TargetParser
 
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 6d0e0b3f4de2c..1e9bfc77ab923 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -1111,7 +1111,7 @@ enum Type { TRAP = -2, WORKGROUP = -1 };
 #define   C_00B84C_LDS_SIZE                                           0xFF007FFF
 #define   S_00B84C_EXCP_EN(x)                                         (((x) & 0x7F) << 24)
 #define   G_00B84C_EXCP_EN(x)                                         (((x) >> 24) & 0x7F)
-#define   C_00B84C_EXCP_EN
+#define   C_00B84C_EXCP_EN                                            0x80FFFFFF
 
 #define R_0286CC_SPI_PS_INPUT_ENA                                       0x0286CC
 #define R_0286D0_SPI_PS_INPUT_ADDR                                      0x0286D0
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
index 95ad3f35d18f8..2a9fa804bc898 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
@@ -15,31 +15,44 @@
 #define QNAME(name) amd_kernel_code_t::name
 #define FLD_T(name) decltype(QNAME(name)), &QNAME(name)
 
+#ifndef FIELD2
 #define FIELD2(sname, aname, name) \
   RECORD(sname, aname, printField<FLD_T(name)>, parseField<FLD_T(name)>)
+#endif
 
+#ifndef FIELD
 #define FIELD(name) FIELD2(name, name, name)
+#endif
 
-
+#ifndef PRINTCODEPROP
 #define PRINTCODEPROP(name) \
   printBitField<FLD_T(code_properties),\
                 AMD_CODE_PROPERTY_##name##_SHIFT,\
                 AMD_CODE_PROPERTY_##name##_WIDTH>
+#endif
 
+#ifndef PARSECODEPROP
 #define PARSECODEPROP(name) \
   parseBitField<FLD_T(code_properties),\
                 AMD_CODE_PROPERTY_##name##_SHIFT,\
                 AMD_CODE_PROPERTY_##name##_WIDTH>
+#endif
 
+#ifndef CODEPROP
 #define CODEPROP(name, shift) \
   RECORD(name, name, PRINTCODEPROP(shift), PARSECODEPROP(shift))
+#endif
 
 // have to define these lambdas because of Set/GetMacro
+#ifndef PRINTCOMP
 #define PRINTCOMP(GetMacro, Shift) \
 [](StringRef Name, const amd_kernel_code_t &C, raw_ostream &OS) { \
    printName(OS, Name) << \
      (int)GetMacro(C.compute_pgm_resource_registers >> Shift); \
 }
+#endif
+
+#ifndef PARSECOMP
 #define PARSECOMP(SetMacro, Shift) \
 [](amd_kernel_code_t &C, MCAsmParser &MCParser, raw_ostream &Err) { \
    int64_t Value = 0; \
@@ -49,15 +62,22 @@
    C.compute_pgm_resource_registers |= SetMacro(Value) << Shift; \
    return true; \
 }
+#endif
 
+#ifndef COMPPGM
 #define COMPPGM(name, aname, GetMacro, SetMacro, Shift) \
   RECORD(name, aname, PRINTCOMP(GetMacro, Shift), PARSECOMP(SetMacro, Shift))
+#endif
 
+#ifndef COMPPGM1
 #define COMPPGM1(name, aname, AccMacro) \
   COMPPGM(name, aname, G_00B848_##AccMacro, S_00B848_##AccMacro, 0)
+#endif
 
+#ifndef COMPPGM2
 #define COMPPGM2(name, aname, AccMacro) \
   COMPPGM(name, aname, G_00B84C_##AccMacro, S_00B84C_##AccMacro, 32)
+#endif
 
 ///////////////////////////////////////////////////////////////////////////////
 // Begin of the table
@@ -149,7 +169,7 @@ FIELD(runtime_loader_kernel_symbol)
 #undef PARSECODEPROP
 #undef CODEPROP
 #undef PRINTCOMP
-#undef PAPSECOMP
+#undef PARSECOMP
 #undef COMPPGM
 #undef COMPPGM1
 #undef COMPPGM2
diff --git a/llvm/test/MC/AMDGPU/amd_kernel_code_t.s b/llvm/test/MC/AMDGPU/amd_kernel_code_t.s
new file mode 100644
index 0000000000000..052ec0bfabb84
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/amd_kernel_code_t.s
@@ -0,0 +1,171 @@
+; RUN: llvm-mc -triple=amdgcn-mesa-mesa3d -mcpu=gfx900 -filetype=asm < %s | FileCheck --check-prefix=ASM %s
+; RUN: llvm-mc -triple=amdgcn-mesa-mesa3d -mcpu=gfx900 -filetype=obj < %s > %t
+; RUN: llvm-objdump -s %t | FileCheck --check-prefix=OBJDUMP %s
+
+; OBJDUMP: Contents of section .known_is_dynamic_callstack:
+; OBJDUMP: 0030 00000000 00000000 00001000 00000000
+
+; OBJDUMP: Contents of section .known_wavefront_sgpr_count:
+; OBJDUMP: 0050 00000000 01000000 00000000 00000000
+
+; OBJDUMP: Contents of section .known_workitem_vgpr_count:
+; OBJDUMP: 0050 00000000 00000100 00000000 00000000
+
+; OBJDUMP: Contents of section .known_workitem_private_segment_byte_size:
+; OBJDUMP: 0030 00000000 00000000 00000000 01000000
+
+; OBJDUMP: Contents of section .known_granulated_workitem_vgpr_count:
+; OBJDUMP: 0030 01000000 00000000 00000000 00000000
+
+; OBJDUMP: Contents of section .known_enable_sgpr_workgroup_id_x:
+; OBJDUMP: 0030 00000000 80000000 00000000 00000000
+
+; OBJDUMP: Contents of section .unknown_is_dynamic_callstack:
+; OBJDUMP: 0030 00000000 00000000 00001000 00000000
+
+; OBJDUMP: Contents of section .unknown_wavefront_sgpr_count:
+; OBJDUMP: 0050 00000000 01000000 00000000 00000000
+
+; OBJDUMP: Contents of section .unknown_workitem_vgpr_count:
+; OBJDUMP: 0050 00000000 00000100 00000000 00000000
+
+; OBJDUMP: Contents of section .unknown_workitem_private_segment_byte_size:
+; OBJDUMP: 0030 00000000 00000000 00000000 01000000
+
+; OBJDUMP: Contents of section .unknown_granulated_workitem_vgpr_count:
+; OBJDUMP: 0030 01000000 00000000 00000000 00000000
+
+; OBJDUMP: Contents of section .unknown_enable_sgpr_workgroup_id_x:
+; OBJDUMP: 0030 00000000 80000000 00000000 00000000
+
+.set known, 1
+
+; ASM-LABEL: known_is_dynamic_callstack:
+; ASM: is_dynamic_callstack = 1
+.section .known_is_dynamic_callstack
+known_is_dynamic_callstack:
+	.amd_kernel_code_t
+		is_dynamic_callstack = known
+	.end_amd_kernel_code_t
+	s_endpgm
+
+; ASM-LABEL: known_wavefront_sgpr_count:
+; ASM: wavefront_sgpr_count = 1
+.section .known_wavefront_sgpr_count
+known_wavefront_sgpr_count:
+	.amd_kernel_code_t
+		wavefront_sgpr_count = known
+	.end_amd_kernel_code_t
+	s_endpgm
+
+; ASM-LABEL: known_workitem_vgpr_count:
+; ASM: workitem_vgpr_count = 1
+.section .known_workitem_vgpr_count
+known_workitem_vgpr_count:
+	.amd_kernel_code_t
+		workitem_vgpr_count = known
+	.end_amd_kernel_code_t
+	s_endpgm
+
+; ASM-LABEL: known_workitem_private_segment_byte_size:
+; ASM: workitem_private_segment_byte_size = 1
+.section .known_workitem_private_segment_byte_size
+known_workitem_private_segment_byte_size:
+	.amd_kernel_code_t
+		workitem_private_segment_byte_size = known
+	.end_amd_kernel_code_t
+	s_endpgm
+
+; ASM-LABEL: known_granulated_workitem_vgpr_count:
+; ASM: granulated_workitem_vgpr_count = 1
+.section .known_granulated_workitem_vgpr_count
+known_granulated_workitem_vgpr_count:
+	.amd_kernel_code_t
+		granulated_workitem_vgpr_count = known
+	.end_amd_kernel_code_t
+	s_endpgm
+
+; ASM-LABEL: known_enable_sgpr_workgroup_id_x:
+; ASM: enable_sgpr_workgroup_id_x = 1
+.section .known_enable_sgpr_workgroup_id_x
+known_enable_sgpr_workgroup_id_x:
+	.amd_kernel_code_t
+		enable_sgpr_workgroup_id_x = known
+	.end_amd_kernel_code_t
+	s_endpgm
+
+; ASM-LABEL: unknown_is_dynamic_callstack:
+; ASM: is_dynamic_callstack = unknown
+.section .unknown_is_dynamic_callstack
+unknown_is_dynamic_callstack:
+	.amd_kernel_code_t
+		is_dynamic_callstack = unknown
+	.end_amd_kernel_code_t
+	s_endpgm
+
+; ASM-LABEL: unknown_wavefront_sgpr_count:
+; ASM: wavefront_sgpr_count = unknown
+.section .unknown_wavefront_sgpr_count
+unknown_wavefront_sgpr_count:
+	.amd_kernel_code_t
+		wavefront_sgpr_count = unknown
+	.end_amd_kernel_code_t
+	s_endpgm
+
+; ASM-LABEL: unknown_workitem_vgpr_count:
+; ASM: workitem_vgpr_count = unknown
+.section .unknown_workitem_vgpr_count
+unknown_workitem_vgpr_count:
+	.amd_kernel_code_t
+		workitem_vgpr_count = unknown
+	.end_amd_kernel_code_t
+	s_endpgm
+
+; ASM-LABEL: unknown_workitem_private_segment_byte_size:
+; ASM: workitem_private_segment_byte_size = unknown
+.section .unknown_workitem_private_segment_byte_size
+unknown_workitem_private_segment_byte_size:
+	.amd_kernel_code_t
+		workitem_private_segment_byte_size = unknown
+	.end_amd_kernel_code_t
+	s_endpgm
+
+; ASM-LABEL: unknown_granulated_workitem_vgpr_count:
+; ASM: granulated_workitem_vgpr_count = ((0&4294967232)|(unknown&63))&63
+; ASM: granulated_wavefront_sgpr_count = (((0&4294967232)|(unknown&63))>>6)&15
+; ASM: priority = (((0&4294967232)|(unknown&63))>>10)&3
+; ASM: float_mode = (((0&4294967232)|(unknown&63))>>12)&255
+; ASM: priv = (((0&4294967232)|(unknown&63))>>20)&1
+; ASM: enable_dx10_clamp = (((0&4294967232)|(unknown&63))>>21)&1
+; ASM: debug_mode = (((0&4294967232)|(unknown&63))>>22)&1
+; ASM: enable_ieee_mode = (((0&4294967232)|(unknown&63))>>23)&1
+; ASM: enable_wgp_mode = (((0&4294967232)|(unknown&63))>>29)&1
+; ASM: enable_mem_ordered = (((0&4294967232)|(unknown&63))>>30)&1
+; ASM: enable_fwd_progress = (((0&4294967232)|(unknown&63))>>31)&1
+.section .unknown_granulated_workitem_vgpr_count
+unknown_granulated_workitem_vgpr_count:
+	.amd_kernel_code_t
+		granulated_workitem_vgpr_count = unknown
+	.end_amd_kernel_code_t
+	s_endpgm
+
+; ASM-LABEL: unknown_enable_sgpr_workgroup_id_x:
+; ASM: enable_sgpr_private_segment_wave_byte_offset = ((0&4294967167)|((unknown&1)<<7))&1
+; ASM: user_sgpr_count = (((0&4294967167)|((unknown&1)<<7))>>1)&31
+; ASM: enable_trap_handler = (((0&4294967167)|((unknown&1)<<7))>>6)&1
+; ASM: enable_sgpr_workgroup_id_x = (((0&4294967167)|((unknown&1)<<7))>>7)&1
+; ASM: enable_sgpr_workgroup_id_y = (((0&4294967167)|((unknown&1)<<7))>>8)&1
+; ASM: enable_sgpr_workgroup_id_z = (((0&4294967167)|((unknown&1)<<7))>>9)&1
+; ASM: enable_sgpr_workgroup_info = (((0&4294967167)|((unknown&1)<<7))>>10)&1
+; ASM: enable_vgpr_workitem_id = (((0&4294967167)|((unknown&1)<<7))>>11)&3
+; ASM: enable_exception_msb = (((0&4294967167)|((unknown&1)<<7))>>13)&3
+; ASM: granulated_lds_size = (((0&4294967167)|((unknown&1)<<7))>>15)&511
+; ASM: enable_exception = (((0&4294967167)|((unknown&1)<<7))>>24)&127
+.section .unknown_enable_sgpr_workgroup_id_x
+unknown_enable_sgpr_workgroup_id_x:
+	.amd_kernel_code_t
+		enable_sgpr_workgroup_id_x = unknown
+	.end_amd_kernel_code_t
+	s_endpgm
+
+.set unknown, 1

>From ccdeaf31408e00cd9de1f352102a517368bec733 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Thu, 9 May 2024 17:05:15 +0100
Subject: [PATCH 2/5] Formatting, rename struct, minor feedback changes

---
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp   | 27 +++++---
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h     |  5 +-
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |  7 +-
 .../MCTargetDesc/AMDGPUMCKernelCodeT.cpp      | 65 +++++++++----------
 .../AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h |  6 +-
 .../MCTargetDesc/AMDGPUTargetStreamer.cpp     |  4 +-
 .../MCTargetDesc/AMDGPUTargetStreamer.h       |  8 +--
 7 files changed, 65 insertions(+), 57 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 8343d3d83d22e..f4a5bd10d5579 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -205,7 +205,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
   if (STM.isMesaKernel(F) &&
       (F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
        F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
-    MCKernelCodeT KernelCode;
+    AMDGPUMCKernelCodeT KernelCode;
     getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
     KernelCode.validate(&STM, MF->getContext());
     getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
@@ -1321,7 +1321,7 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
   }
 }
 
-void AMDGPUAsmPrinter::getAmdKernelCode(MCKernelCodeT &Out,
+void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
                                         const SIProgramInfo &CurrentProgramInfo,
                                         const MachineFunction &MF) const {
   const Function &F = MF.getFunction();
@@ -1341,7 +1341,8 @@ void AMDGPUAsmPrinter::getAmdKernelCode(MCKernelCodeT &Out,
   Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
 
   {
-    const MCExpr *Shift = MCConstantExpr::create(AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT, Ctx);
+    const MCExpr *Shift = MCConstantExpr::create(
+        AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT, Ctx);
     Out.is_dynamic_callstack = MCBinaryExpr::createShl(
         CurrentProgramInfo.DynamicCallStack, Shift, Ctx);
   }
@@ -1353,32 +1354,37 @@ void AMDGPUAsmPrinter::getAmdKernelCode(MCKernelCodeT &Out,
   const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();
   if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
     Out.KernelCode.code_properties |=
-      AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
+        AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
   }
 
   if (UserSGPRInfo.hasDispatchPtr())
-    Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+    Out.KernelCode.code_properties |=
+        AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
 
   if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5)
     Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
 
   if (UserSGPRInfo.hasKernargSegmentPtr())
-    Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
+    Out.KernelCode.code_properties |=
+        AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
 
   if (UserSGPRInfo.hasDispatchID())
     Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
 
   if (UserSGPRInfo.hasFlatScratchInit())
-    Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+    Out.KernelCode.code_properties |=
+        AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
 
   if (UserSGPRInfo.hasDispatchPtr())
-    Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+    Out.KernelCode.code_properties |=
+        AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
 
   if (STM.isXNACKEnabled())
     Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
 
   Align MaxKernArgAlign;
-  Out.KernelCode.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
+  Out.KernelCode.kernarg_segment_byte_size =
+      STM.getKernArgSegmentSize(F, MaxKernArgAlign);
   Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
   Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
   Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
@@ -1387,7 +1393,8 @@ void AMDGPUAsmPrinter::getAmdKernelCode(MCKernelCodeT &Out,
   // kernarg_segment_alignment is specified as log of the alignment.
   // The minimum alignment is 16.
   // FIXME: The metadata treats the minimum as 4?
-  Out.KernelCode.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
+  Out.KernelCode.kernarg_segment_alignment =
+      Log2(std::max(Align(16), MaxKernArgAlign));
 }
 
 bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index c5abbd3c8c084..87156f27fc6c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -27,7 +27,7 @@ class MCOperand;
 
 namespace AMDGPU {
 struct MCKernelDescriptor;
-struct MCKernelCodeT;
+struct AMDGPUMCKernelCodeT;
 namespace HSAMD {
 class MetadataStreamer;
 }
@@ -49,7 +49,8 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
   uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
 
   void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
-  void getAmdKernelCode(AMDGPU::MCKernelCodeT &Out, const SIProgramInfo &KernelInfo,
+  void getAmdKernelCode(AMDGPU::AMDGPUMCKernelCodeT &Out,
+                        const SIProgramInfo &KernelInfo,
                         const MachineFunction &MF) const;
 
   /// Emit register usage information so that the GPU driver
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index b8bdf816a9932..7de2d52a8337b 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1341,7 +1341,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
   bool ParseDirectiveAMDGCNTarget();
   bool ParseDirectiveAMDHSACodeObjectVersion();
   bool ParseDirectiveAMDHSAKernel();
-  bool ParseAMDKernelCodeTValue(StringRef ID, MCKernelCodeT &Header);
+  bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
   bool ParseDirectiveAMDKernelCodeT();
   // TODO: Possibly make subtargetHasRegister const.
   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
@@ -5873,7 +5873,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
   return false;
 }
 
-bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, MCKernelCodeT &C) {
+bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
+                                               AMDGPUMCKernelCodeT &C) {
   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
   // assembly for backwards compatibility.
   if (ID == "max_scratch_backing_memory_byte_size") {
@@ -5917,7 +5918,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, MCKernelCodeT &C) {
 }
 
 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
-  MCKernelCodeT KernelCode;
+  AMDGPUMCKernelCodeT KernelCode;
   KernelCode.initDefault(&getSTI(), getContext());
 
   while (true) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
index 7c081d98dadbf..72d3bfb48b94a 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
@@ -22,11 +22,11 @@
 using namespace llvm;
 using namespace llvm::AMDGPU;
 
-// Generates the following for MCKernelCodeT struct members:
+// Generates the following for AMDGPUMCKernelCodeT struct members:
 //   - HasMemberXXXXX class
-//     A check to see if MCKernelCodeT has a specific member so it can determine
-//     which of the original amd_kernel_code_t members are duplicated (if the
-//     names don't match, the table driven strategy won't work).
+//     A check to see if AMDGPUMCKernelCodeT has a specific member so it can
+//     determine which of the original amd_kernel_code_t members are duplicated
+//     (if the names don't match, the table driven strategy won't work).
 //   - GetMemberXXXXX class
 //     A retrieval helper for said member (of type const MCExpr *&). Will return
 //     a `Phony` const MCExpr * initialized to nullptr to preserve reference
@@ -37,7 +37,8 @@ using namespace llvm::AMDGPU;
     struct KnownWithMember {                                                   \
       int member;                                                              \
     };                                                                         \
-    class AmbiguousDerived : public MCKernelCodeT, public KnownWithMember {};  \
+    class AmbiguousDerived : public AMDGPUMCKernelCodeT,                       \
+                             public KnownWithMember {};                        \
     template <typename U>                                                      \
     static constexpr std::false_type Test(decltype(U::member) *);              \
     template <typename U> static constexpr std::true_type Test(...);           \
@@ -143,7 +144,7 @@ GEN_HAS_MEMBER(runtime_loader_kernel_symbol)
 
 static ArrayRef<StringRef> get_amd_kernel_code_t_FldNames() {
   static StringRef const Table[] = {
-    "", // not found placeholder
+      "", // not found placeholder
 #define RECORD(name, altName, print, parse) #name
 #include "Utils/AMDKernelCodeTInfo.h"
 #undef RECORD
@@ -153,7 +154,7 @@ static ArrayRef<StringRef> get_amd_kernel_code_t_FldNames() {
 
 static ArrayRef<StringRef> get_amd_kernel_code_t_FldAltNames() {
   static StringRef const Table[] = {
-    "", // not found placeholder
+      "", // not found placeholder
 #define RECORD(name, altName, print, parse) #altName
 #include "Utils/AMDKernelCodeTInfo.h"
 #undef RECORD
@@ -171,7 +172,7 @@ static ArrayRef<bool> hasMCExprVersionTable() {
 }
 
 static ArrayRef<std::reference_wrapper<const MCExpr *>>
-getMCExprIndexTable(MCKernelCodeT &C) {
+getMCExprIndexTable(AMDGPUMCKernelCodeT &C) {
   static std::reference_wrapper<const MCExpr *> Table[] = {
 #define RECORD(name, altName, print, parse) GetMember##name::Get(C)
 #include "Utils/AMDKernelCodeTInfo.h"
@@ -235,23 +236,19 @@ static const MCExpr *MaskShiftGet(const MCExpr *Val, uint32_t Mask,
 }
 
 template <typename T, T amd_kernel_code_t::*ptr>
-static void printField(StringRef Name, const MCKernelCodeT &C, raw_ostream &OS,
-                       MCContext &Ctx) {
-  (void)Ctx;
-  OS << Name << " = ";
-  OS << (int)(C.KernelCode.*ptr);
+static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C,
+                       raw_ostream &OS, MCContext &) {
+  OS << Name << " = " << (int)(C.KernelCode.*ptr);
 }
 
 template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
-static void printBitField(StringRef Name, const MCKernelCodeT &C,
-                          raw_ostream &OS, MCContext &Ctx) {
-  (void)Ctx;
+static void printBitField(StringRef Name, const AMDGPUMCKernelCodeT &C,
+                          raw_ostream &OS, MCContext &) {
   const auto Mask = (static_cast<T>(1) << width) - 1;
-  OS << Name << " = ";
-  OS << (int)((C.KernelCode.*ptr >> shift) & Mask);
+  OS << Name << " = " << (int)((C.KernelCode.*ptr >> shift) & Mask);
 }
 
-using PrintFx = void (*)(StringRef, const MCKernelCodeT &, raw_ostream &,
+using PrintFx = void (*)(StringRef, const AMDGPUMCKernelCodeT &, raw_ostream &,
                          MCContext &);
 
 static ArrayRef<PrintFx> getPrinterTable() {
@@ -261,7 +258,7 @@ static ArrayRef<PrintFx> getPrinterTable() {
 #define COMPPGM2(name, aname, AccMacro)                                        \
   COMPPGM(name, aname, C_00B84C_##AccMacro, S_00B84C_##AccMacro, 32)
 #define PRINTCOMP(Complement, PGMType)                                         \
-  [](StringRef Name, const MCKernelCodeT &C, raw_ostream &OS,                  \
+  [](StringRef Name, const AMDGPUMCKernelCodeT &C, raw_ostream &OS,            \
      MCContext &Ctx) {                                                         \
     OS << Name << " = ";                                                       \
     auto [Shift, Mask] = getShiftMask(Complement);                             \
@@ -303,7 +300,7 @@ static bool expectAbsExpression(MCAsmParser &MCParser, int64_t &Value,
 }
 
 template <typename T, T amd_kernel_code_t::*ptr>
-static bool parseField(MCKernelCodeT &C, MCAsmParser &MCParser,
+static bool parseField(AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser,
                        raw_ostream &Err) {
   int64_t Value = 0;
   if (!expectAbsExpression(MCParser, Value, Err))
@@ -313,7 +310,7 @@ static bool parseField(MCKernelCodeT &C, MCAsmParser &MCParser,
 }
 
 template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
-static bool parseBitField(MCKernelCodeT &C, MCAsmParser &MCParser,
+static bool parseBitField(AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser,
                           raw_ostream &Err) {
   int64_t Value = 0;
   if (!expectAbsExpression(MCParser, Value, Err))
@@ -339,7 +336,7 @@ static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value,
   return true;
 }
 
-using ParseFx = bool (*)(MCKernelCodeT &, MCAsmParser &, raw_ostream &);
+using ParseFx = bool (*)(AMDGPUMCKernelCodeT &, MCAsmParser &, raw_ostream &);
 
 static ArrayRef<ParseFx> getParserTable() {
   static const ParseFx Table[] = {
@@ -348,7 +345,8 @@ static ArrayRef<ParseFx> getParserTable() {
 #define COMPPGM2(name, aname, AccMacro)                                        \
   COMPPGM(name, aname, G_00B84C_##AccMacro, C_00B84C_##AccMacro, 32)
 #define PARSECOMP(Complement, PGMType)                                         \
-  [](MCKernelCodeT &C, MCAsmParser &MCParser, raw_ostream &Err) -> bool {      \
+  [](AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser,                            \
+     raw_ostream &Err) -> bool {                                               \
     MCContext &Ctx = MCParser.getContext();                                    \
     const MCExpr *Value;                                                       \
     if (!parseExpr(MCParser, Value, Err))                                      \
@@ -376,14 +374,15 @@ static ArrayRef<ParseFx> getParserTable() {
   return ArrayRef(Table);
 }
 
-static void printAmdKernelCodeField(const MCKernelCodeT &C, int FldIndex,
+static void printAmdKernelCodeField(const AMDGPUMCKernelCodeT &C, int FldIndex,
                                     raw_ostream &OS, MCContext &Ctx) {
   auto Printer = getPrinterTable()[FldIndex];
   if (Printer)
     Printer(get_amd_kernel_code_t_FldNames()[FldIndex + 1], C, OS, Ctx);
 }
 
-void MCKernelCodeT::initDefault(const MCSubtargetInfo *STI, MCContext &Ctx) {
+void AMDGPUMCKernelCodeT::initDefault(const MCSubtargetInfo *STI,
+                                      MCContext &Ctx) {
   AMDGPU::initDefaultAMDKernelCodeT(KernelCode, STI);
   const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
   compute_pgm_resource1_registers = MCConstantExpr::create(
@@ -396,7 +395,7 @@ void MCKernelCodeT::initDefault(const MCSubtargetInfo *STI, MCContext &Ctx) {
   workitem_private_segment_byte_size = ZeroExpr;
 }
 
-void MCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) {
+void AMDGPUMCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) {
   int64_t Value;
   if (!compute_pgm_resource1_registers->evaluateAsAbsolute(Value))
     return;
@@ -427,13 +426,13 @@ void MCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) {
   }
 }
 
-const MCExpr *&MCKernelCodeT::getMCExprForIndex(int Index) {
+const MCExpr *&AMDGPUMCKernelCodeT::getMCExprForIndex(int Index) {
   auto IndexTable = getMCExprIndexTable(*this);
   return IndexTable[Index].get();
 }
 
-bool MCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser,
-                                     raw_ostream &Err) {
+bool AMDGPUMCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser,
+                                           raw_ostream &Err) {
   const int Idx = get_amd_kernel_code_t_FieldIndex(ID);
   if (Idx < 0) {
     Err << "unexpected amd_kernel_code_t field name " << ID;
@@ -451,8 +450,8 @@ bool MCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser,
   return Parser ? Parser(*this, MCParser, Err) : false;
 }
 
-void MCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, const char *tab,
-                                    MCContext &Ctx) {
+void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, const char *tab,
+                                          MCContext &Ctx) {
   const int Size = hasMCExprVersionTable().size();
   for (int i = 0; i < Size; ++i) {
     OS << tab;
@@ -471,7 +470,7 @@ void MCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, const char *tab,
   }
 }
 
-void MCKernelCodeT::EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx) {
+void AMDGPUMCKernelCodeT::EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx) {
   OS.emitIntValue(KernelCode.amd_kernel_code_version_major, /*Size=*/4);
   OS.emitIntValue(KernelCode.amd_kernel_code_version_minor, /*Size=*/4);
   OS.emitIntValue(KernelCode.amd_machine_kind, /*Size=*/2);
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
index 66c5d1107487b..278b0827f07a4 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
@@ -17,8 +17,8 @@
 #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELCODET_H
 
 #include "AMDKernelCodeT.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
 class MCAsmParser;
@@ -29,8 +29,8 @@ class MCSubtargetInfo;
 class raw_ostream;
 namespace AMDGPU {
 
-struct MCKernelCodeT {
-  MCKernelCodeT() = default;
+struct AMDGPUMCKernelCodeT {
+  AMDGPUMCKernelCodeT() = default;
 
   amd_kernel_code_t KernelCode;
   const MCExpr *compute_pgm_resource1_registers = nullptr;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index e58409e8b0cbc..efbbe93ceb3c2 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -240,7 +240,7 @@ void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
   OS << "\t.amdhsa_code_object_version " << COV << '\n';
 }
 
-void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(MCKernelCodeT &Header) {
+void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
   OS << "\t.amd_kernel_code_t\n";
   Header.EmitKernelCodeT(OS, "\t\t", getContext());
   OS << "\t.end_amd_kernel_code_t\n";
@@ -788,7 +788,7 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
 
 void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
 
-void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(MCKernelCodeT &Header) {
+void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
   MCStreamer &OS = getStreamer();
   OS.pushSection();
   Header.EmitKernelCodeT(OS, getContext());
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index ea5d1d379f785..399e0a7dfd098 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -21,7 +21,7 @@ class formatted_raw_ostream;
 
 namespace AMDGPU {
 
-struct MCKernelCodeT;
+struct AMDGPUMCKernelCodeT;
 struct MCKernelDescriptor;
 namespace HSAMD {
 struct Metadata;
@@ -53,7 +53,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
     CodeObjectVersion = COV;
   }
 
-  virtual void EmitAMDKernelCodeT(AMDGPU::MCKernelCodeT &Header){};
+  virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header){};
 
   virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type){};
 
@@ -129,7 +129,7 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
 
   void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override;
 
-  void EmitAMDKernelCodeT(AMDGPU::MCKernelCodeT &Header) override;
+  void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override;
 
   void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
 
@@ -185,7 +185,7 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
 
   void EmitDirectiveAMDGCNTarget() override;
 
-  void EmitAMDKernelCodeT(AMDGPU::MCKernelCodeT &Header) override;
+  void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override;
 
   void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
 

>From 7878171c9051d23246feaa8bbfcd083d23ff49f7 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Thu, 9 May 2024 17:33:50 +0100
Subject: [PATCH 3/5] Formatting that gets undone by local clang-format

---
 llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 399e0a7dfd098..e5c90060cb5d0 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -53,7 +53,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
     CodeObjectVersion = COV;
   }
 
-  virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header){};
+  virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) {};
 
   virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type){};
 

>From 0761ef524557e5ffe7711f95e264475967952fae Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Mon, 13 May 2024 14:40:09 +0100
Subject: [PATCH 4/5] Feedback, remove AMDKernelCodeTUtils files as they're not
 used

---
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |   1 -
 .../MCTargetDesc/AMDGPUMCKernelCodeT.cpp      |  17 +-
 .../AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h |   2 +-
 .../MCTargetDesc/AMDGPUTargetStreamer.cpp     |   3 +-
 .../AMDGPU/Utils/AMDKernelCodeTUtils.cpp      | 177 ------------------
 .../Target/AMDGPU/Utils/AMDKernelCodeTUtils.h |  35 ----
 llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt   |   1 -
 7 files changed, 10 insertions(+), 226 deletions(-)
 delete mode 100644 llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
 delete mode 100644 llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 7de2d52a8337b..1e81efd0b64bd 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -18,7 +18,6 @@
 #include "TargetInfo/AMDGPUTargetInfo.h"
 #include "Utils/AMDGPUAsmUtils.h"
 #include "Utils/AMDGPUBaseInfo.h"
-#include "Utils/AMDKernelCodeTUtils.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/StringSet.h"
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
index 72d3bfb48b94a..8e1d8e6154d21 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
@@ -142,8 +142,8 @@ GEN_HAS_MEMBER(wavefront_size)
 GEN_HAS_MEMBER(call_convention)
 GEN_HAS_MEMBER(runtime_loader_kernel_symbol)
 
-static ArrayRef<StringRef> get_amd_kernel_code_t_FldNames() {
-  static StringRef const Table[] = {
+static ArrayRef<StringLiteral> get_amd_kernel_code_t_FldNames() {
+  static constexpr StringLiteral const Table[] = {
       "", // not found placeholder
 #define RECORD(name, altName, print, parse) #name
 #include "Utils/AMDKernelCodeTInfo.h"
@@ -152,8 +152,8 @@ static ArrayRef<StringRef> get_amd_kernel_code_t_FldNames() {
   return ArrayRef(Table);
 }
 
-static ArrayRef<StringRef> get_amd_kernel_code_t_FldAltNames() {
-  static StringRef const Table[] = {
+static ArrayRef<StringLiteral> get_amd_kernel_code_t_FldAltNames() {
+  static constexpr StringLiteral const Table[] = {
       "", // not found placeholder
 #define RECORD(name, altName, print, parse) #altName
 #include "Utils/AMDKernelCodeTInfo.h"
@@ -181,8 +181,8 @@ getMCExprIndexTable(AMDGPUMCKernelCodeT &C) {
   return ArrayRef(Table);
 }
 
-static StringMap<int> createIndexMap(const ArrayRef<StringRef> &names,
-                                     const ArrayRef<StringRef> &altNames) {
+static StringMap<int> createIndexMap(ArrayRef<StringLiteral> names,
+                                     ArrayRef<StringLiteral> altNames) {
   StringMap<int> map;
   assert(names.size() == altNames.size());
   for (unsigned i = 0; i < names.size(); ++i) {
@@ -450,11 +450,10 @@ bool AMDGPUMCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser,
   return Parser ? Parser(*this, MCParser, Err) : false;
 }
 
-void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, const char *tab,
-                                          MCContext &Ctx) {
+void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx) {
   const int Size = hasMCExprVersionTable().size();
   for (int i = 0; i < Size; ++i) {
-    OS << tab;
+    OS << "\t\t";
     if (hasMCExprVersionTable()[i]) {
       OS << get_amd_kernel_code_t_FldNames()[i + 1] << " = ";
       int64_t Val;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
index 278b0827f07a4..f1f61f130f944 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
@@ -49,7 +49,7 @@ struct AMDGPUMCKernelCodeT {
   const MCExpr *&getMCExprForIndex(int Index);
 
   bool ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser, raw_ostream &Err);
-  void EmitKernelCodeT(raw_ostream &OS, const char *tab, MCContext &Ctx);
+  void EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx);
   void EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx);
 };
 
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index efbbe93ceb3c2..f2a93c5ebaf18 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -15,7 +15,6 @@
 #include "AMDGPUMCKernelDescriptor.h"
 #include "AMDGPUPTNote.h"
 #include "Utils/AMDGPUBaseInfo.h"
-#include "Utils/AMDKernelCodeTUtils.h"
 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAssembler.h"
@@ -242,7 +241,7 @@ void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
 
 void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
   OS << "\t.amd_kernel_code_t\n";
-  Header.EmitKernelCodeT(OS, "\t\t", getContext());
+  Header.EmitKernelCodeT(OS, getContext());
   OS << "\t.end_amd_kernel_code_t\n";
 }
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
deleted file mode 100644
index 6bbc8c3157187..0000000000000
--- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-//===- AMDKernelCodeTUtils.cpp --------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file - utility functions to parse/print amd_kernel_code_t structure
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDKernelCodeTUtils.h"
-#include "AMDKernelCodeT.h"
-#include "SIDefines.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-static ArrayRef<StringRef> get_amd_kernel_code_t_FldNames() {
-  static StringRef const Table[] = {
-    "", // not found placeholder
-#define RECORD(name, altName, print, parse) #name
-#include "AMDKernelCodeTInfo.h"
-#undef RECORD
-  };
-  return ArrayRef(Table);
-}
-
-static ArrayRef<StringRef> get_amd_kernel_code_t_FldAltNames() {
-  static StringRef const Table[] = {
-    "", // not found placeholder
-#define RECORD(name, altName, print, parse) #altName
-#include "AMDKernelCodeTInfo.h"
-#undef RECORD
-  };
-  return ArrayRef(Table);
-}
-
-static StringMap<int> createIndexMap(const ArrayRef<StringRef> &names,
-                                     const ArrayRef<StringRef> &altNames) {
-  StringMap<int> map;
-  assert(names.size() == altNames.size());
-  for (unsigned i = 0; i < names.size(); ++i) {
-    map.insert(std::pair(names[i], i));
-    map.insert(std::pair(altNames[i], i));
-  }
-  return map;
-}
-
-static int get_amd_kernel_code_t_FieldIndex(StringRef name) {
-  static const auto map = createIndexMap(get_amd_kernel_code_t_FldNames(),
-                                         get_amd_kernel_code_t_FldAltNames());
-  return map.lookup(name) - 1; // returns -1 if not found
-}
-
-static StringRef get_amd_kernel_code_t_FieldName(int index) {
-  return get_amd_kernel_code_t_FldNames()[index + 1];
-}
-
-// Field printing
-
-static raw_ostream &printName(raw_ostream &OS, StringRef Name) {
-  return OS << Name << " = ";
-}
-
-template <typename T, T amd_kernel_code_t::*ptr>
-static void printField(StringRef Name, const amd_kernel_code_t &C,
-                       raw_ostream &OS) {
-  printName(OS, Name) << (int)(C.*ptr);
-}
-
-template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
-static void printBitField(StringRef Name, const amd_kernel_code_t &c,
-                          raw_ostream &OS) {
-  const auto Mask = (static_cast<T>(1) << width) - 1;
-  printName(OS, Name) << (int)((c.*ptr >> shift) & Mask);
-}
-
-using PrintFx = void(*)(StringRef, const amd_kernel_code_t &, raw_ostream &);
-
-static ArrayRef<PrintFx> getPrinterTable() {
-  static const PrintFx Table[] = {
-#define RECORD(name, altName, print, parse) print
-#include "AMDKernelCodeTInfo.h"
-#undef RECORD
-  };
-  return ArrayRef(Table);
-}
-
-void llvm::printAmdKernelCodeField(const amd_kernel_code_t &C,
-                                   int FldIndex,
-                                   raw_ostream &OS) {
-  auto Printer = getPrinterTable()[FldIndex];
-  if (Printer)
-    Printer(get_amd_kernel_code_t_FieldName(FldIndex), C, OS);
-}
-
-void llvm::dumpAmdKernelCode(const amd_kernel_code_t *C,
-                             raw_ostream &OS,
-                             const char *tab) {
-  const int Size = getPrinterTable().size();
-  for (int i = 0; i < Size; ++i) {
-    OS << tab;
-    printAmdKernelCodeField(*C, i, OS);
-    OS << '\n';
-  }
-}
-
-// Field parsing
-
-static bool expectAbsExpression(MCAsmParser &MCParser, int64_t &Value, raw_ostream& Err) {
-
-  if (MCParser.getLexer().isNot(AsmToken::Equal)) {
-    Err << "expected '='";
-    return false;
-  }
-  MCParser.getLexer().Lex();
-
-  if (MCParser.parseAbsoluteExpression(Value)) {
-    Err << "integer absolute expression expected";
-    return false;
-  }
-  return true;
-}
-
-template <typename T, T amd_kernel_code_t::*ptr>
-static bool parseField(amd_kernel_code_t &C, MCAsmParser &MCParser,
-                       raw_ostream &Err) {
-  int64_t Value = 0;
-  if (!expectAbsExpression(MCParser, Value, Err))
-    return false;
-  C.*ptr = (T)Value;
-  return true;
-}
-
-template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
-static bool parseBitField(amd_kernel_code_t &C, MCAsmParser &MCParser,
-                          raw_ostream &Err) {
-  int64_t Value = 0;
-  if (!expectAbsExpression(MCParser, Value, Err))
-    return false;
-  const uint64_t Mask = ((UINT64_C(1)  << width) - 1) << shift;
-  C.*ptr &= (T)~Mask;
-  C.*ptr |= (T)((Value << shift) & Mask);
-  return true;
-}
-
-using ParseFx = bool(*)(amd_kernel_code_t &, MCAsmParser &MCParser,
-                        raw_ostream &Err);
-
-static ArrayRef<ParseFx> getParserTable() {
-  static const ParseFx Table[] = {
-#define RECORD(name, altName, print, parse) parse
-#include "AMDKernelCodeTInfo.h"
-#undef RECORD
-  };
-  return ArrayRef(Table);
-}
-
-bool llvm::parseAmdKernelCodeField(StringRef ID,
-                                   MCAsmParser &MCParser,
-                                   amd_kernel_code_t &C,
-                                   raw_ostream &Err) {
-  const int Idx = get_amd_kernel_code_t_FieldIndex(ID);
-  if (Idx < 0) {
-    Err << "unexpected amd_kernel_code_t field name " << ID;
-    return false;
-  }
-  auto Parser = getParserTable()[Idx];
-  return Parser ? Parser(C, MCParser, Err) : false;
-}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
deleted file mode 100644
index 41d0e0d745e5e..0000000000000
--- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
+++ /dev/null
@@ -1,35 +0,0 @@
-//===- AMDGPUKernelCodeTUtils.h - helpers for amd_kernel_code_t -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file AMDKernelCodeTUtils.h
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDKERNELCODETUTILS_H
-#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDKERNELCODETUTILS_H
-
-struct amd_kernel_code_t;
-
-namespace llvm {
-
-class MCAsmParser;
-class raw_ostream;
-class StringRef;
-
-void printAmdKernelCodeField(const amd_kernel_code_t &C, int FldIndex,
-                             raw_ostream &OS);
-
-void dumpAmdKernelCode(const amd_kernel_code_t *C, raw_ostream &OS,
-                       const char *tab);
-
-bool parseAmdKernelCodeField(StringRef ID, MCAsmParser &Parser,
-                             amd_kernel_code_t &C, raw_ostream &Err);
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDKERNELCODETUTILS_H
diff --git a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
index 19d3b690b1315..814d584c149f0 100644
--- a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
@@ -3,7 +3,6 @@ add_llvm_component_library(LLVMAMDGPUUtils
   AMDGPUBaseInfo.cpp
   AMDGPUMemoryUtils.cpp
   AMDGPUPALMetadata.cpp
-  AMDKernelCodeTUtils.cpp
 
   LINK_COMPONENTS
   Analysis

>From 68961589a74346101490158fe5cc6b8122e71910 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <Janek.vanOirschot at amd.com>
Date: Fri, 17 May 2024 06:58:34 -0700
Subject: [PATCH 5/5] Feedback, move destination files of AMDGPUMCKernelCodeT
 struct, overwrite all members (and change table driven strategy to conform
 that)

---
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp   |  38 ++---
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |   9 +-
 .../MCTargetDesc/AMDGPUTargetStreamer.cpp     |   2 +-
 .../Target/AMDGPU/MCTargetDesc/CMakeLists.txt |   2 -
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    |  69 +++++---
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |   3 +-
 .../Target/AMDGPU/Utils/AMDKernelCodeTInfo.h  |  11 +-
 .../AMDKernelCodeTUtils.cpp}                  | 160 +++++++++++-------
 .../AMDKernelCodeTUtils.h}                    |  38 ++++-
 llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt   |   2 +
 10 files changed, 212 insertions(+), 122 deletions(-)
 rename llvm/lib/Target/AMDGPU/{MCTargetDesc/AMDGPUMCKernelCodeT.cpp => Utils/AMDKernelCodeTUtils.cpp} (78%)
 rename llvm/lib/Target/AMDGPU/{MCTargetDesc/AMDGPUMCKernelCodeT.h => Utils/AMDKernelCodeTUtils.h} (57%)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index f4a5bd10d5579..6ff150a0570e1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -22,13 +22,13 @@
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUInstPrinter.h"
 #include "MCTargetDesc/AMDGPUMCExpr.h"
-#include "MCTargetDesc/AMDGPUMCKernelCodeT.h"
 #include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
 #include "R600AsmPrinter.h"
 #include "SIMachineFunctionInfo.h"
 #include "TargetInfo/AMDGPUTargetInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
+#include "Utils/AMDKernelCodeTUtils.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -1332,13 +1332,13 @@ void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
   MCContext &Ctx = MF.getContext();
 
-  AMDGPU::initDefaultAMDKernelCodeT(Out.KernelCode, &STM);
+  AMDGPU::initDefaultAMDKernelCodeT(Out, &STM);
 
   Out.compute_pgm_resource1_registers =
       CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx);
   Out.compute_pgm_resource2_registers =
       CurrentProgramInfo.getComputePGMRSrc2(Ctx);
-  Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
+  Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
 
   {
     const MCExpr *Shift = MCConstantExpr::create(
@@ -1347,54 +1347,46 @@ void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
         CurrentProgramInfo.DynamicCallStack, Shift, Ctx);
   }
 
-  AMD_HSA_BITS_SET(Out.KernelCode.code_properties,
-                   AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
+  AMD_HSA_BITS_SET(Out.code_properties, AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
                    getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
 
   const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();
   if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
-    Out.KernelCode.code_properties |=
-        AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
+    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
   }
 
   if (UserSGPRInfo.hasDispatchPtr())
-    Out.KernelCode.code_properties |=
-        AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
 
   if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5)
-    Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
+    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
 
   if (UserSGPRInfo.hasKernargSegmentPtr())
-    Out.KernelCode.code_properties |=
-        AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
+    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
 
   if (UserSGPRInfo.hasDispatchID())
-    Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
+    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
 
   if (UserSGPRInfo.hasFlatScratchInit())
-    Out.KernelCode.code_properties |=
-        AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
 
   if (UserSGPRInfo.hasDispatchPtr())
-    Out.KernelCode.code_properties |=
-        AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
 
   if (STM.isXNACKEnabled())
-    Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
+    Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
 
   Align MaxKernArgAlign;
-  Out.KernelCode.kernarg_segment_byte_size =
-      STM.getKernArgSegmentSize(F, MaxKernArgAlign);
+  Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
   Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
   Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
   Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
-  Out.KernelCode.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
+  Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
 
   // kernarg_segment_alignment is specified as log of the alignment.
   // The minimum alignment is 16.
   // FIXME: The metadata treats the minimum as 4?
-  Out.KernelCode.kernarg_segment_alignment =
-      Log2(std::max(Align(16), MaxKernArgAlign));
+  Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
 }
 
 bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 1e81efd0b64bd..8b1c1c53de30d 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -8,7 +8,6 @@
 
 #include "AMDKernelCodeT.h"
 #include "MCTargetDesc/AMDGPUMCExpr.h"
-#include "MCTargetDesc/AMDGPUMCKernelCodeT.h"
 #include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
@@ -18,6 +17,7 @@
 #include "TargetInfo/AMDGPUTargetInfo.h"
 #include "Utils/AMDGPUAsmUtils.h"
 #include "Utils/AMDGPUBaseInfo.h"
+#include "Utils/AMDKernelCodeTUtils.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/StringSet.h"
@@ -5889,8 +5889,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
   Lex();
 
   if (ID == "enable_wavefront_size32") {
-    if (C.KernelCode.code_properties &
-        AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
+    if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
       if (!isGFX10Plus())
         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
@@ -5902,12 +5901,12 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
   }
 
   if (ID == "wavefront_size") {
-    if (C.KernelCode.wavefront_size == 5) {
+    if (C.wavefront_size == 5) {
       if (!isGFX10Plus())
         return TokError("wavefront_size=5 is only allowed on GFX10+");
       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
         return TokError("wavefront_size=5 requires +WavefrontSize32");
-    } else if (C.KernelCode.wavefront_size == 6) {
+    } else if (C.wavefront_size == 6) {
       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
         return TokError("wavefront_size=6 requires +WavefrontSize64");
     }
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index f2a93c5ebaf18..00e64e3419ba0 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -11,10 +11,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPUTargetStreamer.h"
-#include "AMDGPUMCKernelCodeT.h"
 #include "AMDGPUMCKernelDescriptor.h"
 #include "AMDGPUPTNote.h"
 #include "Utils/AMDGPUBaseInfo.h"
+#include "Utils/AMDKernelCodeTUtils.h"
 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAssembler.h"
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
index 5ff44ee70afa6..14a02b6d8e368 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -8,7 +8,6 @@ add_llvm_component_library(LLVMAMDGPUDesc
   AMDGPUMCExpr.cpp
   AMDGPUMCTargetDesc.cpp
   AMDGPUTargetStreamer.cpp
-  AMDGPUMCKernelCodeT.cpp
   AMDGPUMCKernelDescriptor.cpp
   R600InstPrinter.cpp
   R600MCCodeEmitter.cpp
@@ -21,7 +20,6 @@ add_llvm_component_library(LLVMAMDGPUDesc
   CodeGenTypes
   Core
   MC
-  MCParser
   Support
   TargetParser
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 2beaf903542bd..2ef7ed2737efe 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -11,6 +11,7 @@
 #include "AMDGPUAsmUtils.h"
 #include "AMDKernelCodeT.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "Utils/AMDKernelCodeTUtils.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/IR/Attributes.h"
@@ -1218,39 +1219,67 @@ unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI,
 }
 } // end namespace IsaInfo
 
-void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
+void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode,
                                const MCSubtargetInfo *STI) {
   IsaVersion Version = getIsaVersion(STI->getCPU());
 
-  memset(&Header, 0, sizeof(Header));
-
-  Header.amd_kernel_code_version_major = 1;
-  Header.amd_kernel_code_version_minor = 2;
-  Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
-  Header.amd_machine_version_major = Version.Major;
-  Header.amd_machine_version_minor = Version.Minor;
-  Header.amd_machine_version_stepping = Version.Stepping;
-  Header.kernel_code_entry_byte_offset = sizeof(Header);
-  Header.wavefront_size = 6;
+  KernelCode.amd_kernel_code_version_major = 0;
+  KernelCode.amd_kernel_code_version_minor = 0;
+  KernelCode.amd_machine_kind = 0;
+  KernelCode.amd_machine_version_major = 0;
+  KernelCode.amd_machine_version_minor = 0;
+  KernelCode.amd_machine_version_stepping = 0;
+  KernelCode.kernel_code_prefetch_byte_offset = 0;
+  KernelCode.kernel_code_prefetch_byte_size = 0;
+  KernelCode.reserved0 = 0;
+  KernelCode.compute_pgm_resource_registers = 0;
+  KernelCode.code_properties = 0;
+  KernelCode.workgroup_group_segment_byte_size = 0;
+  KernelCode.gds_segment_byte_size = 0;
+  KernelCode.kernarg_segment_byte_size = 0;
+  KernelCode.workgroup_fbarrier_count = 0;
+  KernelCode.reserved_vgpr_first = 0;
+  KernelCode.reserved_vgpr_count = 0;
+  KernelCode.reserved_sgpr_first = 0;
+  KernelCode.reserved_sgpr_count = 0;
+  KernelCode.debug_wavefront_private_segment_offset_sgpr = 0;
+  KernelCode.debug_private_segment_buffer_sgpr = 0;
+  KernelCode.kernarg_segment_alignment = 0;
+  KernelCode.group_segment_alignment = 0;
+  KernelCode.private_segment_alignment = 0;
+  KernelCode.call_convention = 0;
+  memset(KernelCode.reserved3, 0, sizeof(KernelCode.reserved3));
+  KernelCode.runtime_loader_kernel_symbol = 0;
+  memset(KernelCode.control_directives, 0,
+         sizeof(KernelCode.control_directives));
+
+  KernelCode.amd_kernel_code_version_major = 1;
+  KernelCode.amd_kernel_code_version_minor = 2;
+  KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
+  KernelCode.amd_machine_version_major = Version.Major;
+  KernelCode.amd_machine_version_minor = Version.Minor;
+  KernelCode.amd_machine_version_stepping = Version.Stepping;
+  KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t);
+  KernelCode.wavefront_size = 6;
 
   // If the code object does not support indirect functions, then the value must
   // be 0xffffffff.
-  Header.call_convention = -1;
+  KernelCode.call_convention = -1;
 
   // These alignment values are specified in powers of two, so alignment =
   // 2^n.  The minimum alignment is 2^4 = 16.
-  Header.kernarg_segment_alignment = 4;
-  Header.group_segment_alignment = 4;
-  Header.private_segment_alignment = 4;
+  KernelCode.kernarg_segment_alignment = 4;
+  KernelCode.group_segment_alignment = 4;
+  KernelCode.private_segment_alignment = 4;
 
   if (Version.Major >= 10) {
     if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
-      Header.wavefront_size = 5;
-      Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
+      KernelCode.wavefront_size = 5;
+      KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
     }
-    Header.compute_pgm_resource_registers |=
-      S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
-      S_00B848_MEM_ORDERED(1);
+    KernelCode.compute_pgm_resource_registers |=
+        S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
+        S_00B848_MEM_ORDERED(1);
   }
 }
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index fc4147df76e3e..3cfc42a7d24d5 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -37,6 +37,7 @@ class raw_ostream;
 
 namespace AMDGPU {
 
+struct AMDGPUMCKernelCodeT;
 struct IsaVersion;
 
 /// Generic target versions emitted by this version of LLVM.
@@ -860,7 +861,7 @@ unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
 LLVM_READONLY
 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
 
-void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
+void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header,
                                const MCSubtargetInfo *STI);
 
 bool isGroupSegment(const GlobalValue *GV);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
index 2a9fa804bc898..75cb6cffbd51b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
@@ -12,12 +12,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define QNAME(name) amd_kernel_code_t::name
+#define QNAME(name) AMDGPUMCKernelCodeT::name
 #define FLD_T(name) decltype(QNAME(name)), &QNAME(name)
 
+#ifndef PRINTFIELD
+#define PRINTFIELD(sname, aname, name) printField<FLD_T(name)>
+#endif
+
 #ifndef FIELD2
-#define FIELD2(sname, aname, name) \
-  RECORD(sname, aname, printField<FLD_T(name)>, parseField<FLD_T(name)>)
+#define FIELD2(sname, aname, name)                                             \
+  RECORD(sname, aname, PRINTFIELD(sname, aname, name), parseField<FLD_T(name)>)
 #endif
 
 #ifndef FIELD
@@ -163,6 +167,7 @@ FIELD(runtime_loader_kernel_symbol)
 
 #undef QNAME
 #undef FLD_T
+#undef PRINTFIELD
 #undef FIELD2
 #undef FIELD
 #undef PRINTCODEPROP
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
similarity index 78%
rename from llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
rename to llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
index 8e1d8e6154d21..bd815d7c8c012 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
@@ -1,12 +1,16 @@
-//===--- AMDHSAKernelCodeT.cpp --------------------------------------------===//
+//===- AMDKernelCodeTUtils.cpp --------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+//
+/// \file - utility functions to parse/print AMDGPUMCKernelCodeT structure
+//
+//===----------------------------------------------------------------------===//
 
-#include "AMDGPUMCKernelCodeT.h"
+#include "AMDKernelCodeTUtils.h"
 #include "AMDKernelCodeT.h"
 #include "SIDefines.h"
 #include "Utils/AMDGPUBaseInfo.h"
@@ -17,6 +21,7 @@
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -48,18 +53,32 @@ using namespace llvm::AMDGPU;
         std::is_same_v<decltype(Test<AmbiguousDerived>(nullptr)),              \
                        std::true_type>;                                        \
   };                                                                           \
+  class IsMCExpr##member {                                                     \
+    template <typename U,                                                      \
+              typename std::enable_if_t<                                       \
+                  HasMember##member::RESULT &&                                 \
+                      std::is_same_v<decltype(U::member), const MCExpr *>,     \
+                  U> * = nullptr>                                              \
+    static constexpr std::true_type HasMCExprType(decltype(U::member) *);      \
+    template <typename U> static constexpr std::false_type HasMCExprType(...); \
+                                                                               \
+  public:                                                                      \
+    static constexpr bool RESULT =                                             \
+        std::is_same_v<decltype(HasMCExprType<AMDGPUMCKernelCodeT>(nullptr)),  \
+                       std::true_type>;                                        \
+  };                                                                           \
   class GetMember##member {                                                    \
   public:                                                                      \
     static const MCExpr *Phony;                                                \
-    template <typename U, typename std::enable_if_t<HasMember##member::RESULT, \
+    template <typename U, typename std::enable_if_t<IsMCExpr##member::RESULT,  \
                                                     U> * = nullptr>            \
     static const MCExpr *&Get(U &C) {                                          \
-      assert(HasMember##member::RESULT &&                                      \
+      assert(IsMCExpr##member::RESULT &&                                       \
              "Trying to retrieve member that does not exist.");                \
       return C.member;                                                         \
     }                                                                          \
-    template <typename U, typename std::enable_if_t<                           \
-                              !HasMember##member::RESULT, U> * = nullptr>      \
+    template <typename U, typename std::enable_if_t<!IsMCExpr##member::RESULT, \
+                                                    U> * = nullptr>            \
     static const MCExpr *&Get(U &C) {                                          \
       return Phony;                                                            \
     }                                                                          \
@@ -164,7 +183,7 @@ static ArrayRef<StringLiteral> get_amd_kernel_code_t_FldAltNames() {
 
 static ArrayRef<bool> hasMCExprVersionTable() {
   static bool const Table[] = {
-#define RECORD(name, altName, print, parse) (HasMember##name::RESULT)
+#define RECORD(name, altName, print, parse) (IsMCExpr##name::RESULT)
 #include "Utils/AMDKernelCodeTInfo.h"
 #undef RECORD
   };
@@ -235,17 +254,34 @@ static const MCExpr *MaskShiftGet(const MCExpr *Val, uint32_t Mask,
   return Val;
 }
 
-template <typename T, T amd_kernel_code_t::*ptr>
-static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C,
-                       raw_ostream &OS, MCContext &) {
-  OS << Name << " = " << (int)(C.KernelCode.*ptr);
-}
+class PrintField {
+public:
+  template <typename T, T AMDGPUMCKernelCodeT::*ptr,
+            typename std::enable_if_t<!std::is_integral_v<T>, T> * = nullptr>
+  static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C,
+                         raw_ostream &OS, MCContext &Ctx) {
+    OS << Name << " = ";
+    const MCExpr *Value = C.*ptr;
+    int64_t Val;
+    if (Value->evaluateAsAbsolute(Val))
+      OS << Val;
+    else
+      Value->print(OS, Ctx.getAsmInfo());
+  }
+
+  template <typename T, T AMDGPUMCKernelCodeT::*ptr,
+            typename std::enable_if_t<std::is_integral_v<T>, T> * = nullptr>
+  static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C,
+                         raw_ostream &OS, MCContext &) {
+    OS << Name << " = " << (int)(C.*ptr);
+  }
+};
 
-template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
+template <typename T, T AMDGPUMCKernelCodeT::*ptr, int shift, int width = 1>
 static void printBitField(StringRef Name, const AMDGPUMCKernelCodeT &C,
                           raw_ostream &OS, MCContext &) {
   const auto Mask = (static_cast<T>(1) << width) - 1;
-  OS << Name << " = " << (int)((C.KernelCode.*ptr >> shift) & Mask);
+  OS << Name << " = " << (int)((C.*ptr >> shift) & Mask);
 }
 
 using PrintFx = void (*)(StringRef, const AMDGPUMCKernelCodeT &, raw_ostream &,
@@ -257,6 +293,7 @@ static ArrayRef<PrintFx> getPrinterTable() {
   COMPPGM(name, aname, C_00B848_##AccMacro, S_00B848_##AccMacro, 0)
 #define COMPPGM2(name, aname, AccMacro)                                        \
   COMPPGM(name, aname, C_00B84C_##AccMacro, S_00B84C_##AccMacro, 32)
+#define PRINTFIELD(sname, aname, name) PrintField::printField<FLD_T(name)>
 #define PRINTCOMP(Complement, PGMType)                                         \
   [](StringRef Name, const AMDGPUMCKernelCodeT &C, raw_ostream &OS,            \
      MCContext &Ctx) {                                                         \
@@ -299,25 +336,25 @@ static bool expectAbsExpression(MCAsmParser &MCParser, int64_t &Value,
   return true;
 }
 
-template <typename T, T amd_kernel_code_t::*ptr>
+template <typename T, T AMDGPUMCKernelCodeT::*ptr>
 static bool parseField(AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser,
                        raw_ostream &Err) {
   int64_t Value = 0;
   if (!expectAbsExpression(MCParser, Value, Err))
     return false;
-  C.KernelCode.*ptr = (T)Value;
+  C.*ptr = (T)Value;
   return true;
 }
 
-template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
+template <typename T, T AMDGPUMCKernelCodeT::*ptr, int shift, int width = 1>
 static bool parseBitField(AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser,
                           raw_ostream &Err) {
   int64_t Value = 0;
   if (!expectAbsExpression(MCParser, Value, Err))
     return false;
   const uint64_t Mask = ((UINT64_C(1) << width) - 1) << shift;
-  C.KernelCode.*ptr &= (T)~Mask;
-  C.KernelCode.*ptr |= (T)((Value << shift) & Mask);
+  C.*ptr &= (T)~Mask;
+  C.*ptr |= (T)((Value << shift) & Mask);
   return true;
 }
 
@@ -383,12 +420,12 @@ static void printAmdKernelCodeField(const AMDGPUMCKernelCodeT &C, int FldIndex,
 
 void AMDGPUMCKernelCodeT::initDefault(const MCSubtargetInfo *STI,
                                       MCContext &Ctx) {
-  AMDGPU::initDefaultAMDKernelCodeT(KernelCode, STI);
+  AMDGPU::initDefaultAMDKernelCodeT(*this, STI);
   const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
-  compute_pgm_resource1_registers = MCConstantExpr::create(
-      KernelCode.compute_pgm_resource_registers & 0xFFFFFFFF, Ctx);
-  compute_pgm_resource2_registers = MCConstantExpr::create(
-      (KernelCode.compute_pgm_resource_registers >> 32) & 0xffffffff, Ctx);
+  compute_pgm_resource1_registers =
+      MCConstantExpr::create(Lo_32(compute_pgm_resource_registers), Ctx);
+  compute_pgm_resource2_registers =
+      MCConstantExpr::create(Hi_32(compute_pgm_resource_registers), Ctx);
   is_dynamic_callstack = ZeroExpr;
   wavefront_sgpr_count = ZeroExpr;
   workitem_vgpr_count = ZeroExpr;
@@ -470,33 +507,31 @@ void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx) {
 }
 
 void AMDGPUMCKernelCodeT::EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx) {
-  OS.emitIntValue(KernelCode.amd_kernel_code_version_major, /*Size=*/4);
-  OS.emitIntValue(KernelCode.amd_kernel_code_version_minor, /*Size=*/4);
-  OS.emitIntValue(KernelCode.amd_machine_kind, /*Size=*/2);
-  OS.emitIntValue(KernelCode.amd_machine_version_major, /*Size=*/2);
-  OS.emitIntValue(KernelCode.amd_machine_version_minor, /*Size=*/2);
-  OS.emitIntValue(KernelCode.amd_machine_version_stepping, /*Size=*/2);
-  OS.emitIntValue(KernelCode.kernel_code_entry_byte_offset, /*Size=*/8);
-  OS.emitIntValue(KernelCode.kernel_code_prefetch_byte_offset, /*Size=*/8);
-  OS.emitIntValue(KernelCode.kernel_code_prefetch_byte_size, /*Size=*/8);
-  OS.emitIntValue(KernelCode.reserved0, /*Size=*/8);
+  OS.emitIntValue(amd_kernel_code_version_major, /*Size=*/4);
+  OS.emitIntValue(amd_kernel_code_version_minor, /*Size=*/4);
+  OS.emitIntValue(amd_machine_kind, /*Size=*/2);
+  OS.emitIntValue(amd_machine_version_major, /*Size=*/2);
+  OS.emitIntValue(amd_machine_version_minor, /*Size=*/2);
+  OS.emitIntValue(amd_machine_version_stepping, /*Size=*/2);
+  OS.emitIntValue(kernel_code_entry_byte_offset, /*Size=*/8);
+  OS.emitIntValue(kernel_code_prefetch_byte_offset, /*Size=*/8);
+  OS.emitIntValue(kernel_code_prefetch_byte_size, /*Size=*/8);
+  OS.emitIntValue(reserved0, /*Size=*/8);
 
   if (compute_pgm_resource1_registers != nullptr)
     OS.emitValue(compute_pgm_resource1_registers, /*Size=*/4);
   else
-    OS.emitIntValue(KernelCode.compute_pgm_resource_registers & 0xFFFFFFFF,
+    OS.emitIntValue(Lo_32(compute_pgm_resource_registers),
                     /*Size=*/4);
 
   if (compute_pgm_resource2_registers != nullptr)
     OS.emitValue(compute_pgm_resource2_registers, /*Size=*/4);
   else
-    OS.emitIntValue((KernelCode.compute_pgm_resource_registers >> 32) &
-                        0xFFFFFFFF,
+    OS.emitIntValue(Hi_32(compute_pgm_resource_registers),
                     /*Size=*/4);
 
   if (is_dynamic_callstack != nullptr) {
-    const MCExpr *CodeProps =
-        MCConstantExpr::create(KernelCode.code_properties, Ctx);
+    const MCExpr *CodeProps = MCConstantExpr::create(code_properties, Ctx);
     CodeProps = MCBinaryExpr::createOr(
         CodeProps,
         MaskShiftSet(is_dynamic_callstack,
@@ -505,43 +540,42 @@ void AMDGPUMCKernelCodeT::EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx) {
         Ctx);
     OS.emitValue(CodeProps, /*Size=*/4);
   } else
-    OS.emitIntValue(KernelCode.code_properties, /*Size=*/4);
+    OS.emitIntValue(code_properties, /*Size=*/4);
 
   if (workitem_private_segment_byte_size != nullptr)
     OS.emitValue(workitem_private_segment_byte_size, /*Size=*/4);
   else
-    OS.emitIntValue(KernelCode.workitem_private_segment_byte_size, /*Size=*/4);
+    OS.emitIntValue(0, /*Size=*/4);
 
-  OS.emitIntValue(KernelCode.workgroup_group_segment_byte_size, /*Size=*/4);
-  OS.emitIntValue(KernelCode.gds_segment_byte_size, /*Size=*/4);
-  OS.emitIntValue(KernelCode.kernarg_segment_byte_size, /*Size=*/8);
-  OS.emitIntValue(KernelCode.workgroup_fbarrier_count, /*Size=*/4);
+  OS.emitIntValue(workgroup_group_segment_byte_size, /*Size=*/4);
+  OS.emitIntValue(gds_segment_byte_size, /*Size=*/4);
+  OS.emitIntValue(kernarg_segment_byte_size, /*Size=*/8);
+  OS.emitIntValue(workgroup_fbarrier_count, /*Size=*/4);
 
   if (wavefront_sgpr_count != nullptr)
     OS.emitValue(wavefront_sgpr_count, /*Size=*/2);
   else
-    OS.emitIntValue(KernelCode.wavefront_sgpr_count, /*Size=*/2);
+    OS.emitIntValue(0, /*Size=*/2);
 
   if (workitem_vgpr_count != nullptr)
     OS.emitValue(workitem_vgpr_count, /*Size=*/2);
   else
-    OS.emitIntValue(KernelCode.workitem_vgpr_count, /*Size=*/2);
+    OS.emitIntValue(0, /*Size=*/2);
 
-  OS.emitIntValue(KernelCode.reserved_vgpr_first, /*Size=*/2);
-  OS.emitIntValue(KernelCode.reserved_vgpr_count, /*Size=*/2);
-  OS.emitIntValue(KernelCode.reserved_sgpr_first, /*Size=*/2);
-  OS.emitIntValue(KernelCode.reserved_sgpr_count, /*Size=*/2);
-  OS.emitIntValue(KernelCode.debug_wavefront_private_segment_offset_sgpr,
+  OS.emitIntValue(reserved_vgpr_first, /*Size=*/2);
+  OS.emitIntValue(reserved_vgpr_count, /*Size=*/2);
+  OS.emitIntValue(reserved_sgpr_first, /*Size=*/2);
+  OS.emitIntValue(reserved_sgpr_count, /*Size=*/2);
+  OS.emitIntValue(debug_wavefront_private_segment_offset_sgpr,
                   /*Size=*/2);
-  OS.emitIntValue(KernelCode.debug_private_segment_buffer_sgpr, /*Size=*/2);
-  OS.emitIntValue(KernelCode.kernarg_segment_alignment, /*Size=*/1);
-  OS.emitIntValue(KernelCode.group_segment_alignment, /*Size=*/1);
-  OS.emitIntValue(KernelCode.private_segment_alignment, /*Size=*/1);
-  OS.emitIntValue(KernelCode.wavefront_size, /*Size=*/1);
-
-  OS.emitIntValue(KernelCode.call_convention, /*Size=*/4);
-  OS.emitBytes(StringRef((const char *)KernelCode.reserved3, /*Size=*/12));
-  OS.emitIntValue(KernelCode.runtime_loader_kernel_symbol, /*Size=*/8);
-  OS.emitBytes(
-      StringRef((const char *)KernelCode.control_directives, /*Size=*/16 * 8));
+  OS.emitIntValue(debug_private_segment_buffer_sgpr, /*Size=*/2);
+  OS.emitIntValue(kernarg_segment_alignment, /*Size=*/1);
+  OS.emitIntValue(group_segment_alignment, /*Size=*/1);
+  OS.emitIntValue(private_segment_alignment, /*Size=*/1);
+  OS.emitIntValue(wavefront_size, /*Size=*/1);
+
+  OS.emitIntValue(call_convention, /*Size=*/4);
+  OS.emitBytes(StringRef((const char *)reserved3, /*Size=*/12));
+  OS.emitIntValue(runtime_loader_kernel_symbol, /*Size=*/8);
+  OS.emitBytes(StringRef((const char *)control_directives, /*Size=*/16 * 8));
 }
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
similarity index 57%
rename from llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
rename to llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
index f1f61f130f944..5577d300fd0b9 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
@@ -1,4 +1,4 @@
-//===--- AMDGPUMCKernelCodeT.h --------------------------------*- C++ -*---===//
+//===- AMDGPUKernelCodeTUtils.h - helpers for amd_kernel_code_t -*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-/// \file
-/// MC layer struct for amd_kernel_code_t, provides MCExpr functionality where
+/// \file AMDKernelCodeTUtils.h
+/// MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where
 /// required.
 ///
 //
@@ -32,7 +32,37 @@ namespace AMDGPU {
 struct AMDGPUMCKernelCodeT {
   AMDGPUMCKernelCodeT() = default;
 
-  amd_kernel_code_t KernelCode;
+  uint32_t amd_kernel_code_version_major;
+  uint32_t amd_kernel_code_version_minor;
+  uint16_t amd_machine_kind;
+  uint16_t amd_machine_version_major;
+  uint16_t amd_machine_version_minor;
+  uint16_t amd_machine_version_stepping;
+  int64_t kernel_code_entry_byte_offset;
+  int64_t kernel_code_prefetch_byte_offset;
+  uint64_t kernel_code_prefetch_byte_size;
+  uint64_t reserved0;
+  uint64_t compute_pgm_resource_registers;
+  uint32_t code_properties;
+  uint32_t workgroup_group_segment_byte_size;
+  uint32_t gds_segment_byte_size;
+  uint64_t kernarg_segment_byte_size;
+  uint32_t workgroup_fbarrier_count;
+  uint16_t reserved_vgpr_first;
+  uint16_t reserved_vgpr_count;
+  uint16_t reserved_sgpr_first;
+  uint16_t reserved_sgpr_count;
+  uint16_t debug_wavefront_private_segment_offset_sgpr;
+  uint16_t debug_private_segment_buffer_sgpr;
+  uint8_t kernarg_segment_alignment;
+  uint8_t group_segment_alignment;
+  uint8_t private_segment_alignment;
+  uint8_t wavefront_size;
+  int32_t call_convention;
+  uint8_t reserved3[12];
+  uint64_t runtime_loader_kernel_symbol;
+  uint64_t control_directives[16];
+
   const MCExpr *compute_pgm_resource1_registers = nullptr;
   const MCExpr *compute_pgm_resource2_registers = nullptr;
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
index 814d584c149f0..2f4ce8eaf1d60 100644
--- a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_component_library(LLVMAMDGPUUtils
   AMDGPUBaseInfo.cpp
   AMDGPUMemoryUtils.cpp
   AMDGPUPALMetadata.cpp
+  AMDKernelCodeTUtils.cpp
 
   LINK_COMPONENTS
   Analysis
@@ -10,6 +11,7 @@ add_llvm_component_library(LLVMAMDGPUUtils
   CodeGenTypes
   Core
   MC
+  MCParser
   Support
   TargetParser
 



More information about the llvm-commits mailing list