[llvm] MCExpr-ify amd_kernel_code_t (PR #91587)
Janek van Oirschot via llvm-commits
llvm-commits at lists.llvm.org
Fri May 17 07:00:08 PDT 2024
https://github.com/JanekvO updated https://github.com/llvm/llvm-project/pull/91587
>From f2ea08b52bf33305571ddf13402672645397d81b Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Thu, 9 May 2024 13:33:47 +0100
Subject: [PATCH 1/5] MCExpr-ify amd_kernel_code_t
---
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 56 +-
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 5 +-
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 54 +-
.../MCTargetDesc/AMDGPUMCKernelCodeT.cpp | 549 ++++++++++++++++++
.../AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h | 59 ++
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 13 +-
.../MCTargetDesc/AMDGPUTargetStreamer.h | 9 +-
.../Target/AMDGPU/MCTargetDesc/CMakeLists.txt | 2 +
llvm/lib/Target/AMDGPU/SIDefines.h | 2 +-
.../Target/AMDGPU/Utils/AMDKernelCodeTInfo.h | 24 +-
llvm/test/MC/AMDGPU/amd_kernel_code_t.s | 171 ++++++
11 files changed, 858 insertions(+), 86 deletions(-)
create mode 100644 llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
create mode 100644 llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
create mode 100644 llvm/test/MC/AMDGPU/amd_kernel_code_t.s
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index de81904143b7b..8343d3d83d22e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -19,10 +19,10 @@
#include "AMDGPU.h"
#include "AMDGPUHSAMetadataStreamer.h"
#include "AMDGPUResourceUsageAnalysis.h"
-#include "AMDKernelCodeT.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCExpr.h"
+#include "MCTargetDesc/AMDGPUMCKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "R600AsmPrinter.h"
@@ -205,8 +205,9 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
if (STM.isMesaKernel(F) &&
(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
- amd_kernel_code_t KernelCode;
+ MCKernelCodeT KernelCode;
getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
+ KernelCode.validate(&STM, MF->getContext());
getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
}
@@ -1320,7 +1321,7 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
}
}
-void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
+void AMDGPUAsmPrinter::getAmdKernelCode(MCKernelCodeT &Out,
const SIProgramInfo &CurrentProgramInfo,
const MachineFunction &MF) const {
const Function &F = MF.getFunction();
@@ -1331,59 +1332,62 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
MCContext &Ctx = MF.getContext();
- AMDGPU::initDefaultAMDKernelCodeT(Out, &STM);
+ AMDGPU::initDefaultAMDKernelCodeT(Out.KernelCode, &STM);
- Out.compute_pgm_resource_registers =
- CurrentProgramInfo.getComputePGMRSrc1(STM) |
- (CurrentProgramInfo.getComputePGMRSrc2() << 32);
- Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
+ Out.compute_pgm_resource1_registers =
+ CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx);
+ Out.compute_pgm_resource2_registers =
+ CurrentProgramInfo.getComputePGMRSrc2(Ctx);
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
- if (getMCExprValue(CurrentProgramInfo.DynamicCallStack, Ctx))
- Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK;
+ {
+ const MCExpr *Shift = MCConstantExpr::create(AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT, Ctx);
+ Out.is_dynamic_callstack = MCBinaryExpr::createShl(
+ CurrentProgramInfo.DynamicCallStack, Shift, Ctx);
+ }
- AMD_HSA_BITS_SET(Out.code_properties,
+ AMD_HSA_BITS_SET(Out.KernelCode.code_properties,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();
if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
- Out.code_properties |=
+ Out.KernelCode.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
}
if (UserSGPRInfo.hasDispatchPtr())
- Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5)
- Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
if (UserSGPRInfo.hasKernargSegmentPtr())
- Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
if (UserSGPRInfo.hasDispatchID())
- Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
if (UserSGPRInfo.hasFlatScratchInit())
- Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
if (UserSGPRInfo.hasDispatchPtr())
- Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (STM.isXNACKEnabled())
- Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
Align MaxKernArgAlign;
- Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
- Out.wavefront_sgpr_count = getMCExprValue(CurrentProgramInfo.NumSGPR, Ctx);
- Out.workitem_vgpr_count = getMCExprValue(CurrentProgramInfo.NumVGPR, Ctx);
- Out.workitem_private_segment_byte_size =
- getMCExprValue(CurrentProgramInfo.ScratchSize, Ctx);
- Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
+ Out.KernelCode.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
+ Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
+ Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
+ Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
+ Out.KernelCode.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
// kernarg_segment_alignment is specified as log of the alignment.
// The minimum alignment is 16.
// FIXME: The metadata treats the minimum as 4?
- Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
+ Out.KernelCode.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
}
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 16d8952a533ef..c5abbd3c8c084 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -17,8 +17,6 @@
#include "SIProgramInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
-struct amd_kernel_code_t;
-
namespace llvm {
class AMDGPUMachineFunction;
@@ -29,6 +27,7 @@ class MCOperand;
namespace AMDGPU {
struct MCKernelDescriptor;
+struct MCKernelCodeT;
namespace HSAMD {
class MetadataStreamer;
}
@@ -50,7 +49,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
- void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
+ void getAmdKernelCode(AMDGPU::MCKernelCodeT &Out, const SIProgramInfo &KernelInfo,
const MachineFunction &MF) const;
/// Emit register usage information so that the GPU driver
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index d47a5f8ebb815..b8bdf816a9932 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -8,6 +8,7 @@
#include "AMDKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCExpr.h"
+#include "MCTargetDesc/AMDGPUMCKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
@@ -1340,7 +1341,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool ParseDirectiveAMDGCNTarget();
bool ParseDirectiveAMDHSACodeObjectVersion();
bool ParseDirectiveAMDHSAKernel();
- bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
+ bool ParseAMDKernelCodeTValue(StringRef ID, MCKernelCodeT &Header);
bool ParseDirectiveAMDKernelCodeT();
// TODO: Possibly make subtargetHasRegister const.
bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
@@ -5872,8 +5873,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
return false;
}
-bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
- amd_kernel_code_t &Header) {
+bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, MCKernelCodeT &C) {
// max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
// assembly for backwards compatibility.
if (ID == "max_scratch_backing_memory_byte_size") {
@@ -5883,25 +5883,14 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
SmallString<40> ErrStr;
raw_svector_ostream Err(ErrStr);
- if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
+ if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
return TokError(Err.str());
}
Lex();
- if (ID == "enable_dx10_clamp") {
- if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
- isGFX12Plus())
- return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
- }
-
- if (ID == "enable_ieee_mode") {
- if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
- isGFX12Plus())
- return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
- }
-
if (ID == "enable_wavefront_size32") {
- if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
+ if (C.KernelCode.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
if (!isGFX10Plus())
return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
@@ -5913,41 +5902,23 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
}
if (ID == "wavefront_size") {
- if (Header.wavefront_size == 5) {
+ if (C.KernelCode.wavefront_size == 5) {
if (!isGFX10Plus())
return TokError("wavefront_size=5 is only allowed on GFX10+");
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
return TokError("wavefront_size=5 requires +WavefrontSize32");
- } else if (Header.wavefront_size == 6) {
+ } else if (C.KernelCode.wavefront_size == 6) {
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
return TokError("wavefront_size=6 requires +WavefrontSize64");
}
}
- if (ID == "enable_wgp_mode") {
- if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
- !isGFX10Plus())
- return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
- }
-
- if (ID == "enable_mem_ordered") {
- if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
- !isGFX10Plus())
- return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
- }
-
- if (ID == "enable_fwd_progress") {
- if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
- !isGFX10Plus())
- return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
- }
-
return false;
}
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
- amd_kernel_code_t Header;
- AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
+ MCKernelCodeT KernelCode;
+ KernelCode.initDefault(&getSTI(), getContext());
while (true) {
// Lex EndOfStatement. This is in a while loop, because lexing a comment
@@ -5961,11 +5932,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
if (ID == ".end_amd_kernel_code_t")
break;
- if (ParseAMDKernelCodeTValue(ID, Header))
+ if (ParseAMDKernelCodeTValue(ID, KernelCode))
return true;
}
- getTargetStreamer().EmitAMDKernelCodeT(Header);
+ KernelCode.validate(&getSTI(), getContext());
+ getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
new file mode 100644
index 0000000000000..7c081d98dadbf
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
@@ -0,0 +1,549 @@
+//===--- AMDHSAKernelCodeT.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCKernelCodeT.h"
+#include "AMDKernelCodeT.h"
+#include "SIDefines.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::AMDGPU;
+
+// Generates the following for MCKernelCodeT struct members:
+// - HasMemberXXXXX class
+// A check to see if MCKernelCodeT has a specific member so it can determine
+// which of the original amd_kernel_code_t members are duplicated (if the
+// names don't match, the table driven strategy won't work).
+// - GetMemberXXXXX class
+// A retrieval helper for said member (of type const MCExpr *&). Will return
+// a `Phony` const MCExpr * initialized to nullptr to preserve reference
+// returns.
+#define GEN_HAS_MEMBER(member) \
+ class HasMember##member { \
+ private: \
+ struct KnownWithMember { \
+ int member; \
+ }; \
+ class AmbiguousDerived : public MCKernelCodeT, public KnownWithMember {}; \
+ template <typename U> \
+ static constexpr std::false_type Test(decltype(U::member) *); \
+ template <typename U> static constexpr std::true_type Test(...); \
+ \
+ public: \
+ static constexpr bool RESULT = \
+ std::is_same_v<decltype(Test<AmbiguousDerived>(nullptr)), \
+ std::true_type>; \
+ }; \
+ class GetMember##member { \
+ public: \
+ static const MCExpr *Phony; \
+ template <typename U, typename std::enable_if_t<HasMember##member::RESULT, \
+ U> * = nullptr> \
+ static const MCExpr *&Get(U &C) { \
+ assert(HasMember##member::RESULT && \
+ "Trying to retrieve member that does not exist."); \
+ return C.member; \
+ } \
+ template <typename U, typename std::enable_if_t< \
+ !HasMember##member::RESULT, U> * = nullptr> \
+ static const MCExpr *&Get(U &C) { \
+ return Phony; \
+ } \
+ }; \
+ const MCExpr *GetMember##member::Phony = nullptr;
+
+// Cannot generate class declarations using the table driver approach (see table
+// in AMDKernelCodeTInfo.h). Luckily, if any are missing here or eventually
+// added to the table, an error should occur when trying to retrieve the table
+// in getMCExprIndexTable.
+GEN_HAS_MEMBER(amd_code_version_major)
+GEN_HAS_MEMBER(amd_code_version_minor)
+GEN_HAS_MEMBER(amd_machine_kind)
+GEN_HAS_MEMBER(amd_machine_version_major)
+GEN_HAS_MEMBER(amd_machine_version_minor)
+GEN_HAS_MEMBER(amd_machine_version_stepping)
+
+GEN_HAS_MEMBER(kernel_code_entry_byte_offset)
+GEN_HAS_MEMBER(kernel_code_prefetch_byte_size)
+
+GEN_HAS_MEMBER(granulated_workitem_vgpr_count)
+GEN_HAS_MEMBER(granulated_wavefront_sgpr_count)
+GEN_HAS_MEMBER(priority)
+GEN_HAS_MEMBER(float_mode)
+GEN_HAS_MEMBER(priv)
+GEN_HAS_MEMBER(enable_dx10_clamp)
+GEN_HAS_MEMBER(debug_mode)
+GEN_HAS_MEMBER(enable_ieee_mode)
+GEN_HAS_MEMBER(enable_wgp_mode)
+GEN_HAS_MEMBER(enable_mem_ordered)
+GEN_HAS_MEMBER(enable_fwd_progress)
+
+GEN_HAS_MEMBER(enable_sgpr_private_segment_wave_byte_offset)
+GEN_HAS_MEMBER(user_sgpr_count)
+GEN_HAS_MEMBER(enable_trap_handler)
+GEN_HAS_MEMBER(enable_sgpr_workgroup_id_x)
+GEN_HAS_MEMBER(enable_sgpr_workgroup_id_y)
+GEN_HAS_MEMBER(enable_sgpr_workgroup_id_z)
+GEN_HAS_MEMBER(enable_sgpr_workgroup_info)
+GEN_HAS_MEMBER(enable_vgpr_workitem_id)
+GEN_HAS_MEMBER(enable_exception_msb)
+GEN_HAS_MEMBER(granulated_lds_size)
+GEN_HAS_MEMBER(enable_exception)
+
+GEN_HAS_MEMBER(enable_sgpr_private_segment_buffer)
+GEN_HAS_MEMBER(enable_sgpr_dispatch_ptr)
+GEN_HAS_MEMBER(enable_sgpr_queue_ptr)
+GEN_HAS_MEMBER(enable_sgpr_kernarg_segment_ptr)
+GEN_HAS_MEMBER(enable_sgpr_dispatch_id)
+GEN_HAS_MEMBER(enable_sgpr_flat_scratch_init)
+GEN_HAS_MEMBER(enable_sgpr_private_segment_size)
+GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_x)
+GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_y)
+GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_z)
+GEN_HAS_MEMBER(enable_wavefront_size32)
+GEN_HAS_MEMBER(enable_ordered_append_gds)
+GEN_HAS_MEMBER(private_element_size)
+GEN_HAS_MEMBER(is_ptr64)
+GEN_HAS_MEMBER(is_dynamic_callstack)
+GEN_HAS_MEMBER(is_debug_enabled)
+GEN_HAS_MEMBER(is_xnack_enabled)
+
+GEN_HAS_MEMBER(workitem_private_segment_byte_size)
+GEN_HAS_MEMBER(workgroup_group_segment_byte_size)
+GEN_HAS_MEMBER(gds_segment_byte_size)
+GEN_HAS_MEMBER(kernarg_segment_byte_size)
+GEN_HAS_MEMBER(workgroup_fbarrier_count)
+GEN_HAS_MEMBER(wavefront_sgpr_count)
+GEN_HAS_MEMBER(workitem_vgpr_count)
+GEN_HAS_MEMBER(reserved_vgpr_first)
+GEN_HAS_MEMBER(reserved_vgpr_count)
+GEN_HAS_MEMBER(reserved_sgpr_first)
+GEN_HAS_MEMBER(reserved_sgpr_count)
+GEN_HAS_MEMBER(debug_wavefront_private_segment_offset_sgpr)
+GEN_HAS_MEMBER(debug_private_segment_buffer_sgpr)
+GEN_HAS_MEMBER(kernarg_segment_alignment)
+GEN_HAS_MEMBER(group_segment_alignment)
+GEN_HAS_MEMBER(private_segment_alignment)
+GEN_HAS_MEMBER(wavefront_size)
+GEN_HAS_MEMBER(call_convention)
+GEN_HAS_MEMBER(runtime_loader_kernel_symbol)
+
+static ArrayRef<StringRef> get_amd_kernel_code_t_FldNames() {
+ static StringRef const Table[] = {
+ "", // not found placeholder
+#define RECORD(name, altName, print, parse) #name
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return ArrayRef(Table);
+}
+
+static ArrayRef<StringRef> get_amd_kernel_code_t_FldAltNames() {
+ static StringRef const Table[] = {
+ "", // not found placeholder
+#define RECORD(name, altName, print, parse) #altName
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return ArrayRef(Table);
+}
+
+static ArrayRef<bool> hasMCExprVersionTable() {
+ static bool const Table[] = {
+#define RECORD(name, altName, print, parse) (HasMember##name::RESULT)
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return ArrayRef(Table);
+}
+
+static ArrayRef<std::reference_wrapper<const MCExpr *>>
+getMCExprIndexTable(MCKernelCodeT &C) {
+ static std::reference_wrapper<const MCExpr *> Table[] = {
+#define RECORD(name, altName, print, parse) GetMember##name::Get(C)
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return ArrayRef(Table);
+}
+
+static StringMap<int> createIndexMap(const ArrayRef<StringRef> &names,
+ const ArrayRef<StringRef> &altNames) {
+ StringMap<int> map;
+ assert(names.size() == altNames.size());
+ for (unsigned i = 0; i < names.size(); ++i) {
+ map.insert(std::pair(names[i], i));
+ map.insert(std::pair(altNames[i], i));
+ }
+ return map;
+}
+
+static int get_amd_kernel_code_t_FieldIndex(StringRef name) {
+ static const auto map = createIndexMap(get_amd_kernel_code_t_FldNames(),
+ get_amd_kernel_code_t_FldAltNames());
+ return map.lookup(name) - 1; // returns -1 if not found
+}
+
+static constexpr std::pair<unsigned, unsigned> getShiftMask(unsigned Value) {
+ unsigned Shift = 0;
+ unsigned Mask = 0;
+
+ Mask = ~Value;
+ for (; !(Mask & 1); Shift++, Mask >>= 1) {
+ }
+
+ return std::make_pair(Shift, Mask);
+}
+
+static const MCExpr *MaskShiftSet(const MCExpr *Val, uint32_t Mask,
+ uint32_t Shift, MCContext &Ctx) {
+ if (Mask) {
+ const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
+ Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
+ }
+ if (Shift) {
+ const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
+ Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx);
+ }
+ return Val;
+}
+
+static const MCExpr *MaskShiftGet(const MCExpr *Val, uint32_t Mask,
+ uint32_t Shift, MCContext &Ctx) {
+ if (Shift) {
+ const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
+ Val = MCBinaryExpr::createLShr(Val, ShiftExpr, Ctx);
+ }
+ if (Mask) {
+ const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
+ Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
+ }
+ return Val;
+}
+
+template <typename T, T amd_kernel_code_t::*ptr>
+static void printField(StringRef Name, const MCKernelCodeT &C, raw_ostream &OS,
+ MCContext &Ctx) {
+ (void)Ctx;
+ OS << Name << " = ";
+ OS << (int)(C.KernelCode.*ptr);
+}
+
+template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
+static void printBitField(StringRef Name, const MCKernelCodeT &C,
+ raw_ostream &OS, MCContext &Ctx) {
+ (void)Ctx;
+ const auto Mask = (static_cast<T>(1) << width) - 1;
+ OS << Name << " = ";
+ OS << (int)((C.KernelCode.*ptr >> shift) & Mask);
+}
+
+using PrintFx = void (*)(StringRef, const MCKernelCodeT &, raw_ostream &,
+ MCContext &);
+
+static ArrayRef<PrintFx> getPrinterTable() {
+ static const PrintFx Table[] = {
+#define COMPPGM1(name, aname, AccMacro) \
+ COMPPGM(name, aname, C_00B848_##AccMacro, S_00B848_##AccMacro, 0)
+#define COMPPGM2(name, aname, AccMacro) \
+ COMPPGM(name, aname, C_00B84C_##AccMacro, S_00B84C_##AccMacro, 32)
+#define PRINTCOMP(Complement, PGMType) \
+ [](StringRef Name, const MCKernelCodeT &C, raw_ostream &OS, \
+ MCContext &Ctx) { \
+ OS << Name << " = "; \
+ auto [Shift, Mask] = getShiftMask(Complement); \
+ const MCExpr *Value; \
+ if (PGMType == 0) { \
+ Value = \
+ MaskShiftGet(C.compute_pgm_resource1_registers, Mask, Shift, Ctx); \
+ } else { \
+ Value = \
+ MaskShiftGet(C.compute_pgm_resource2_registers, Mask, Shift, Ctx); \
+ } \
+ int64_t Val; \
+ if (Value->evaluateAsAbsolute(Val)) \
+ OS << Val; \
+ else \
+ Value->print(OS, Ctx.getAsmInfo()); \
+ }
+#define RECORD(name, altName, print, parse) print
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return ArrayRef(Table);
+}
+
+static bool expectAbsExpression(MCAsmParser &MCParser, int64_t &Value,
+ raw_ostream &Err) {
+
+ if (MCParser.getLexer().isNot(AsmToken::Equal)) {
+ Err << "expected '='";
+ return false;
+ }
+ MCParser.getLexer().Lex();
+
+ if (MCParser.parseAbsoluteExpression(Value)) {
+ Err << "integer absolute expression expected";
+ return false;
+ }
+ return true;
+}
+
+template <typename T, T amd_kernel_code_t::*ptr>
+static bool parseField(MCKernelCodeT &C, MCAsmParser &MCParser,
+ raw_ostream &Err) {
+ int64_t Value = 0;
+ if (!expectAbsExpression(MCParser, Value, Err))
+ return false;
+ C.KernelCode.*ptr = (T)Value;
+ return true;
+}
+
+template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
+static bool parseBitField(MCKernelCodeT &C, MCAsmParser &MCParser,
+ raw_ostream &Err) {
+ int64_t Value = 0;
+ if (!expectAbsExpression(MCParser, Value, Err))
+ return false;
+ const uint64_t Mask = ((UINT64_C(1) << width) - 1) << shift;
+ C.KernelCode.*ptr &= (T)~Mask;
+ C.KernelCode.*ptr |= (T)((Value << shift) & Mask);
+ return true;
+}
+
+static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value,
+ raw_ostream &Err) {
+ if (MCParser.getLexer().isNot(AsmToken::Equal)) {
+ Err << "expected '='";
+ return false;
+ }
+ MCParser.getLexer().Lex();
+
+ if (MCParser.parseExpression(Value)) {
+ Err << "Could not parse expression";
+ return false;
+ }
+ return true;
+}
+
+using ParseFx = bool (*)(MCKernelCodeT &, MCAsmParser &, raw_ostream &);
+
+static ArrayRef<ParseFx> getParserTable() {
+ static const ParseFx Table[] = {
+#define COMPPGM1(name, aname, AccMacro) \
+ COMPPGM(name, aname, G_00B848_##AccMacro, C_00B848_##AccMacro, 0)
+#define COMPPGM2(name, aname, AccMacro) \
+ COMPPGM(name, aname, G_00B84C_##AccMacro, C_00B84C_##AccMacro, 32)
+#define PARSECOMP(Complement, PGMType) \
+ [](MCKernelCodeT &C, MCAsmParser &MCParser, raw_ostream &Err) -> bool { \
+ MCContext &Ctx = MCParser.getContext(); \
+ const MCExpr *Value; \
+ if (!parseExpr(MCParser, Value, Err)) \
+ return false; \
+ auto [Shift, Mask] = getShiftMask(Complement); \
+ Value = MaskShiftSet(Value, Mask, Shift, Ctx); \
+ const MCExpr *Compl = MCConstantExpr::create(Complement, Ctx); \
+ if (PGMType == 0) { \
+ C.compute_pgm_resource1_registers = MCBinaryExpr::createAnd( \
+ C.compute_pgm_resource1_registers, Compl, Ctx); \
+ C.compute_pgm_resource1_registers = MCBinaryExpr::createOr( \
+ C.compute_pgm_resource1_registers, Value, Ctx); \
+ } else { \
+ C.compute_pgm_resource2_registers = MCBinaryExpr::createAnd( \
+ C.compute_pgm_resource2_registers, Compl, Ctx); \
+ C.compute_pgm_resource2_registers = MCBinaryExpr::createOr( \
+ C.compute_pgm_resource2_registers, Value, Ctx); \
+ } \
+ return true; \
+ }
+#define RECORD(name, altName, print, parse) parse
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return ArrayRef(Table);
+}
+
+static void printAmdKernelCodeField(const MCKernelCodeT &C, int FldIndex,
+ raw_ostream &OS, MCContext &Ctx) {
+ auto Printer = getPrinterTable()[FldIndex];
+ if (Printer)
+ Printer(get_amd_kernel_code_t_FldNames()[FldIndex + 1], C, OS, Ctx);
+}
+
+void MCKernelCodeT::initDefault(const MCSubtargetInfo *STI, MCContext &Ctx) {
+ AMDGPU::initDefaultAMDKernelCodeT(KernelCode, STI);
+ const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
+ compute_pgm_resource1_registers = MCConstantExpr::create(
+ KernelCode.compute_pgm_resource_registers & 0xFFFFFFFF, Ctx);
+ compute_pgm_resource2_registers = MCConstantExpr::create(
+ (KernelCode.compute_pgm_resource_registers >> 32) & 0xffffffff, Ctx);
+ is_dynamic_callstack = ZeroExpr;
+ wavefront_sgpr_count = ZeroExpr;
+ workitem_vgpr_count = ZeroExpr;
+ workitem_private_segment_byte_size = ZeroExpr;
+}
+
+void MCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) {
+ int64_t Value;
+ if (!compute_pgm_resource1_registers->evaluateAsAbsolute(Value))
+ return;
+
+ if (G_00B848_DX10_CLAMP(Value) && AMDGPU::isGFX12Plus(*STI)) {
+ Ctx.reportError({}, "enable_dx10_clamp=1 is not allowed on GFX12+");
+ return;
+ }
+
+ if (G_00B848_IEEE_MODE(Value) && AMDGPU::isGFX12Plus(*STI)) {
+ Ctx.reportError({}, "enable_ieee_mode=1 is not allowed on GFX12+");
+ return;
+ }
+
+ if (G_00B848_WGP_MODE(Value) && !AMDGPU::isGFX10Plus(*STI)) {
+ Ctx.reportError({}, "enable_wgp_mode=1 is only allowed on GFX10+");
+ return;
+ }
+
+ if (G_00B848_MEM_ORDERED(Value) && !AMDGPU::isGFX10Plus(*STI)) {
+ Ctx.reportError({}, "enable_mem_ordered=1 is only allowed on GFX10+");
+ return;
+ }
+
+ if (G_00B848_FWD_PROGRESS(Value) && !AMDGPU::isGFX10Plus(*STI)) {
+ Ctx.reportError({}, "enable_fwd_progress=1 is only allowed on GFX10+");
+ return;
+ }
+}
+
+const MCExpr *&MCKernelCodeT::getMCExprForIndex(int Index) {
+ auto IndexTable = getMCExprIndexTable(*this);
+ return IndexTable[Index].get();
+}
+
+bool MCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser,
+ raw_ostream &Err) {
+ const int Idx = get_amd_kernel_code_t_FieldIndex(ID);
+ if (Idx < 0) {
+ Err << "unexpected amd_kernel_code_t field name " << ID;
+ return false;
+ }
+
+ if (hasMCExprVersionTable()[Idx]) {
+ const MCExpr *Value;
+ if (!parseExpr(MCParser, Value, Err))
+ return false;
+ getMCExprForIndex(Idx) = Value;
+ return true;
+ }
+ auto Parser = getParserTable()[Idx];
+ return Parser ? Parser(*this, MCParser, Err) : false;
+}
+
+void MCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, const char *tab,
+ MCContext &Ctx) {
+ const int Size = hasMCExprVersionTable().size();
+ for (int i = 0; i < Size; ++i) {
+ OS << tab;
+ if (hasMCExprVersionTable()[i]) {
+ OS << get_amd_kernel_code_t_FldNames()[i + 1] << " = ";
+ int64_t Val;
+ const MCExpr *Value = getMCExprForIndex(i);
+ if (Value->evaluateAsAbsolute(Val))
+ OS << Val;
+ else
+ Value->print(OS, Ctx.getAsmInfo());
+ } else {
+ printAmdKernelCodeField(*this, i, OS, Ctx);
+ }
+ OS << '\n';
+ }
+}
+
+void MCKernelCodeT::EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx) {
+ OS.emitIntValue(KernelCode.amd_kernel_code_version_major, /*Size=*/4);
+ OS.emitIntValue(KernelCode.amd_kernel_code_version_minor, /*Size=*/4);
+ OS.emitIntValue(KernelCode.amd_machine_kind, /*Size=*/2);
+ OS.emitIntValue(KernelCode.amd_machine_version_major, /*Size=*/2);
+ OS.emitIntValue(KernelCode.amd_machine_version_minor, /*Size=*/2);
+ OS.emitIntValue(KernelCode.amd_machine_version_stepping, /*Size=*/2);
+ OS.emitIntValue(KernelCode.kernel_code_entry_byte_offset, /*Size=*/8);
+ OS.emitIntValue(KernelCode.kernel_code_prefetch_byte_offset, /*Size=*/8);
+ OS.emitIntValue(KernelCode.kernel_code_prefetch_byte_size, /*Size=*/8);
+ OS.emitIntValue(KernelCode.reserved0, /*Size=*/8);
+
+ if (compute_pgm_resource1_registers != nullptr)
+ OS.emitValue(compute_pgm_resource1_registers, /*Size=*/4);
+ else
+ OS.emitIntValue(KernelCode.compute_pgm_resource_registers & 0xFFFFFFFF,
+ /*Size=*/4);
+
+ if (compute_pgm_resource2_registers != nullptr)
+ OS.emitValue(compute_pgm_resource2_registers, /*Size=*/4);
+ else
+ OS.emitIntValue((KernelCode.compute_pgm_resource_registers >> 32) &
+ 0xFFFFFFFF,
+ /*Size=*/4);
+
+ if (is_dynamic_callstack != nullptr) {
+ const MCExpr *CodeProps =
+ MCConstantExpr::create(KernelCode.code_properties, Ctx);
+ CodeProps = MCBinaryExpr::createOr(
+ CodeProps,
+ MaskShiftSet(is_dynamic_callstack,
+ (1 << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH) - 1,
+ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT, Ctx),
+ Ctx);
+ OS.emitValue(CodeProps, /*Size=*/4);
+ } else
+ OS.emitIntValue(KernelCode.code_properties, /*Size=*/4);
+
+ if (workitem_private_segment_byte_size != nullptr)
+ OS.emitValue(workitem_private_segment_byte_size, /*Size=*/4);
+ else
+ OS.emitIntValue(KernelCode.workitem_private_segment_byte_size, /*Size=*/4);
+
+ OS.emitIntValue(KernelCode.workgroup_group_segment_byte_size, /*Size=*/4);
+ OS.emitIntValue(KernelCode.gds_segment_byte_size, /*Size=*/4);
+ OS.emitIntValue(KernelCode.kernarg_segment_byte_size, /*Size=*/8);
+ OS.emitIntValue(KernelCode.workgroup_fbarrier_count, /*Size=*/4);
+
+ if (wavefront_sgpr_count != nullptr)
+ OS.emitValue(wavefront_sgpr_count, /*Size=*/2);
+ else
+ OS.emitIntValue(KernelCode.wavefront_sgpr_count, /*Size=*/2);
+
+ if (workitem_vgpr_count != nullptr)
+ OS.emitValue(workitem_vgpr_count, /*Size=*/2);
+ else
+ OS.emitIntValue(KernelCode.workitem_vgpr_count, /*Size=*/2);
+
+ OS.emitIntValue(KernelCode.reserved_vgpr_first, /*Size=*/2);
+ OS.emitIntValue(KernelCode.reserved_vgpr_count, /*Size=*/2);
+ OS.emitIntValue(KernelCode.reserved_sgpr_first, /*Size=*/2);
+ OS.emitIntValue(KernelCode.reserved_sgpr_count, /*Size=*/2);
+ OS.emitIntValue(KernelCode.debug_wavefront_private_segment_offset_sgpr,
+ /*Size=*/2);
+ OS.emitIntValue(KernelCode.debug_private_segment_buffer_sgpr, /*Size=*/2);
+ OS.emitIntValue(KernelCode.kernarg_segment_alignment, /*Size=*/1);
+ OS.emitIntValue(KernelCode.group_segment_alignment, /*Size=*/1);
+ OS.emitIntValue(KernelCode.private_segment_alignment, /*Size=*/1);
+ OS.emitIntValue(KernelCode.wavefront_size, /*Size=*/1);
+
+ OS.emitIntValue(KernelCode.call_convention, /*Size=*/4);
+ OS.emitBytes(StringRef((const char *)KernelCode.reserved3, /*Size=*/12));
+ OS.emitIntValue(KernelCode.runtime_loader_kernel_symbol, /*Size=*/8);
+ OS.emitBytes(
+ StringRef((const char *)KernelCode.control_directives, /*Size=*/16 * 8));
+}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
new file mode 100644
index 0000000000000..66c5d1107487b
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
@@ -0,0 +1,59 @@
+//===--- AMDGPUMCKernelCodeT.h --------------------------------*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// MC layer struct for amd_kernel_code_t, provides MCExpr functionality where
+/// required.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELCODET_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELCODET_H
+
+#include "AMDKernelCodeT.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+class MCAsmParser;
+class MCContext;
+class MCExpr;
+class MCStreamer;
+class MCSubtargetInfo;
+class raw_ostream;
+namespace AMDGPU {
+
+struct MCKernelCodeT {
+ MCKernelCodeT() = default;
+
+ amd_kernel_code_t KernelCode;
+ const MCExpr *compute_pgm_resource1_registers = nullptr;
+ const MCExpr *compute_pgm_resource2_registers = nullptr;
+
+ // Duplicated fields, but uses MCExpr instead.
+ // Name has to be the same as the ones used in AMDKernelCodeTInfo.h.
+ const MCExpr *is_dynamic_callstack = nullptr;
+ const MCExpr *wavefront_sgpr_count = nullptr;
+ const MCExpr *workitem_vgpr_count = nullptr;
+ const MCExpr *workitem_private_segment_byte_size = nullptr;
+
+ void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx);
+ void validate(const MCSubtargetInfo *STI, MCContext &Ctx);
+
+ const MCExpr *&getMCExprForIndex(int Index);
+
+ bool ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser, raw_ostream &Err);
+ void EmitKernelCodeT(raw_ostream &OS, const char *tab, MCContext &Ctx);
+ void EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx);
+};
+
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELCODET_H
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 02fe7be06280e..e58409e8b0cbc 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -11,9 +11,9 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetStreamer.h"
+#include "AMDGPUMCKernelCodeT.h"
#include "AMDGPUMCKernelDescriptor.h"
#include "AMDGPUPTNote.h"
-#include "AMDKernelCodeT.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
@@ -240,10 +240,9 @@ void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
OS << "\t.amdhsa_code_object_version " << COV << '\n';
}
-void
-AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
+void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(MCKernelCodeT &Header) {
OS << "\t.amd_kernel_code_t\n";
- dumpAmdKernelCode(&Header, OS, "\t\t");
+ Header.EmitKernelCodeT(OS, "\t\t", getContext());
OS << "\t.end_amd_kernel_code_t\n";
}
@@ -789,12 +788,10 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
-void
-AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
-
+void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(MCKernelCodeT &Header) {
MCStreamer &OS = getStreamer();
OS.pushSection();
- OS.emitBytes(StringRef((const char*)&Header, sizeof(Header)));
+ Header.EmitKernelCodeT(OS, getContext());
OS.popSection();
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 706897a5dc1f4..ea5d1d379f785 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -13,8 +13,6 @@
#include "Utils/AMDGPUPALMetadata.h"
#include "llvm/MC/MCStreamer.h"
-struct amd_kernel_code_t;
-
namespace llvm {
class MCELFStreamer;
@@ -23,6 +21,7 @@ class formatted_raw_ostream;
namespace AMDGPU {
+struct MCKernelCodeT;
struct MCKernelDescriptor;
namespace HSAMD {
struct Metadata;
@@ -54,7 +53,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
CodeObjectVersion = COV;
}
- virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header){};
+ virtual void EmitAMDKernelCodeT(AMDGPU::MCKernelCodeT &Header){};
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type){};
@@ -130,7 +129,7 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override;
- void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
+ void EmitAMDKernelCodeT(AMDGPU::MCKernelCodeT &Header) override;
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
@@ -186,7 +185,7 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
void EmitDirectiveAMDGCNTarget() override;
- void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
+ void EmitAMDKernelCodeT(AMDGPU::MCKernelCodeT &Header) override;
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
index 14a02b6d8e368..5ff44ee70afa6 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_component_library(LLVMAMDGPUDesc
AMDGPUMCExpr.cpp
AMDGPUMCTargetDesc.cpp
AMDGPUTargetStreamer.cpp
+ AMDGPUMCKernelCodeT.cpp
AMDGPUMCKernelDescriptor.cpp
R600InstPrinter.cpp
R600MCCodeEmitter.cpp
@@ -20,6 +21,7 @@ add_llvm_component_library(LLVMAMDGPUDesc
CodeGenTypes
Core
MC
+ MCParser
Support
TargetParser
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 6d0e0b3f4de2c..1e9bfc77ab923 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -1111,7 +1111,7 @@ enum Type { TRAP = -2, WORKGROUP = -1 };
#define C_00B84C_LDS_SIZE 0xFF007FFF
#define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24)
#define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F)
-#define C_00B84C_EXCP_EN
+#define C_00B84C_EXCP_EN 0x80FFFFFF
#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
#define R_0286D0_SPI_PS_INPUT_ADDR 0x0286D0
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
index 95ad3f35d18f8..2a9fa804bc898 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
@@ -15,31 +15,44 @@
#define QNAME(name) amd_kernel_code_t::name
#define FLD_T(name) decltype(QNAME(name)), &QNAME(name)
+#ifndef FIELD2
#define FIELD2(sname, aname, name) \
RECORD(sname, aname, printField<FLD_T(name)>, parseField<FLD_T(name)>)
+#endif
+#ifndef FIELD
#define FIELD(name) FIELD2(name, name, name)
+#endif
-
+#ifndef PRINTCODEPROP
#define PRINTCODEPROP(name) \
printBitField<FLD_T(code_properties),\
AMD_CODE_PROPERTY_##name##_SHIFT,\
AMD_CODE_PROPERTY_##name##_WIDTH>
+#endif
+#ifndef PARSECODEPROP
#define PARSECODEPROP(name) \
parseBitField<FLD_T(code_properties),\
AMD_CODE_PROPERTY_##name##_SHIFT,\
AMD_CODE_PROPERTY_##name##_WIDTH>
+#endif
+#ifndef CODEPROP
#define CODEPROP(name, shift) \
RECORD(name, name, PRINTCODEPROP(shift), PARSECODEPROP(shift))
+#endif
// have to define these lambdas because of Set/GetMacro
+#ifndef PRINTCOMP
#define PRINTCOMP(GetMacro, Shift) \
[](StringRef Name, const amd_kernel_code_t &C, raw_ostream &OS) { \
printName(OS, Name) << \
(int)GetMacro(C.compute_pgm_resource_registers >> Shift); \
}
+#endif
+
+#ifndef PARSECOMP
#define PARSECOMP(SetMacro, Shift) \
[](amd_kernel_code_t &C, MCAsmParser &MCParser, raw_ostream &Err) { \
int64_t Value = 0; \
@@ -49,15 +62,22 @@
C.compute_pgm_resource_registers |= SetMacro(Value) << Shift; \
return true; \
}
+#endif
+#ifndef COMPPGM
#define COMPPGM(name, aname, GetMacro, SetMacro, Shift) \
RECORD(name, aname, PRINTCOMP(GetMacro, Shift), PARSECOMP(SetMacro, Shift))
+#endif
+#ifndef COMPPGM1
#define COMPPGM1(name, aname, AccMacro) \
COMPPGM(name, aname, G_00B848_##AccMacro, S_00B848_##AccMacro, 0)
+#endif
+#ifndef COMPPGM2
#define COMPPGM2(name, aname, AccMacro) \
COMPPGM(name, aname, G_00B84C_##AccMacro, S_00B84C_##AccMacro, 32)
+#endif
///////////////////////////////////////////////////////////////////////////////
// Begin of the table
@@ -149,7 +169,7 @@ FIELD(runtime_loader_kernel_symbol)
#undef PARSECODEPROP
#undef CODEPROP
#undef PRINTCOMP
-#undef PAPSECOMP
+#undef PARSECOMP
#undef COMPPGM
#undef COMPPGM1
#undef COMPPGM2
diff --git a/llvm/test/MC/AMDGPU/amd_kernel_code_t.s b/llvm/test/MC/AMDGPU/amd_kernel_code_t.s
new file mode 100644
index 0000000000000..052ec0bfabb84
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/amd_kernel_code_t.s
@@ -0,0 +1,171 @@
+; RUN: llvm-mc -triple=amdgcn-mesa-mesa3d -mcpu=gfx900 -filetype=asm < %s | FileCheck --check-prefix=ASM %s
+; RUN: llvm-mc -triple=amdgcn-mesa-mesa3d -mcpu=gfx900 -filetype=obj < %s > %t
+; RUN: llvm-objdump -s %t | FileCheck --check-prefix=OBJDUMP %s
+
+; OBJDUMP: Contents of section .known_is_dynamic_callstack:
+; OBJDUMP: 0030 00000000 00000000 00001000 00000000
+
+; OBJDUMP: Contents of section .known_wavefront_sgpr_count:
+; OBJDUMP: 0050 00000000 01000000 00000000 00000000
+
+; OBJDUMP: Contents of section .known_workitem_vgpr_count:
+; OBJDUMP: 0050 00000000 00000100 00000000 00000000
+
+; OBJDUMP: Contents of section .known_workitem_private_segment_byte_size:
+; OBJDUMP: 0030 00000000 00000000 00000000 01000000
+
+; OBJDUMP: Contents of section .known_granulated_workitem_vgpr_count:
+; OBJDUMP: 0030 01000000 00000000 00000000 00000000
+
+; OBJDUMP: Contents of section .known_enable_sgpr_workgroup_id_x:
+; OBJDUMP: 0030 00000000 80000000 00000000 00000000
+
+; OBJDUMP: Contents of section .unknown_is_dynamic_callstack:
+; OBJDUMP: 0030 00000000 00000000 00001000 00000000
+
+; OBJDUMP: Contents of section .unknown_wavefront_sgpr_count:
+; OBJDUMP: 0050 00000000 01000000 00000000 00000000
+
+; OBJDUMP: Contents of section .unknown_workitem_vgpr_count:
+; OBJDUMP: 0050 00000000 00000100 00000000 00000000
+
+; OBJDUMP: Contents of section .unknown_workitem_private_segment_byte_size:
+; OBJDUMP: 0030 00000000 00000000 00000000 01000000
+
+; OBJDUMP: Contents of section .unknown_granulated_workitem_vgpr_count:
+; OBJDUMP: 0030 01000000 00000000 00000000 00000000
+
+; OBJDUMP: Contents of section .unknown_enable_sgpr_workgroup_id_x:
+; OBJDUMP: 0030 00000000 80000000 00000000 00000000
+
+.set known, 1
+
+; ASM-LABEL: known_is_dynamic_callstack:
+; ASM: is_dynamic_callstack = 1
+.section .known_is_dynamic_callstack
+known_is_dynamic_callstack:
+ .amd_kernel_code_t
+ is_dynamic_callstack = known
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: known_wavefront_sgpr_count:
+; ASM: wavefront_sgpr_count = 1
+.section .known_wavefront_sgpr_count
+known_wavefront_sgpr_count:
+ .amd_kernel_code_t
+ wavefront_sgpr_count = known
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: known_workitem_vgpr_count:
+; ASM: workitem_vgpr_count = 1
+.section .known_workitem_vgpr_count
+known_workitem_vgpr_count:
+ .amd_kernel_code_t
+ workitem_vgpr_count = known
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: known_workitem_private_segment_byte_size:
+; ASM: workitem_private_segment_byte_size = 1
+.section .known_workitem_private_segment_byte_size
+known_workitem_private_segment_byte_size:
+ .amd_kernel_code_t
+ workitem_private_segment_byte_size = known
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: known_granulated_workitem_vgpr_count:
+; ASM: granulated_workitem_vgpr_count = 1
+.section .known_granulated_workitem_vgpr_count
+known_granulated_workitem_vgpr_count:
+ .amd_kernel_code_t
+ granulated_workitem_vgpr_count = known
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: known_enable_sgpr_workgroup_id_x:
+; ASM: enable_sgpr_workgroup_id_x = 1
+.section .known_enable_sgpr_workgroup_id_x
+known_enable_sgpr_workgroup_id_x:
+ .amd_kernel_code_t
+ enable_sgpr_workgroup_id_x = known
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: unknown_is_dynamic_callstack:
+; ASM: is_dynamic_callstack = unknown
+.section .unknown_is_dynamic_callstack
+unknown_is_dynamic_callstack:
+ .amd_kernel_code_t
+ is_dynamic_callstack = unknown
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: unknown_wavefront_sgpr_count:
+; ASM: wavefront_sgpr_count = unknown
+.section .unknown_wavefront_sgpr_count
+unknown_wavefront_sgpr_count:
+ .amd_kernel_code_t
+ wavefront_sgpr_count = unknown
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: unknown_workitem_vgpr_count:
+; ASM: workitem_vgpr_count = unknown
+.section .unknown_workitem_vgpr_count
+unknown_workitem_vgpr_count:
+ .amd_kernel_code_t
+ workitem_vgpr_count = unknown
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: unknown_workitem_private_segment_byte_size:
+; ASM: workitem_private_segment_byte_size = unknown
+.section .unknown_workitem_private_segment_byte_size
+unknown_workitem_private_segment_byte_size:
+ .amd_kernel_code_t
+ workitem_private_segment_byte_size = unknown
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: unknown_granulated_workitem_vgpr_count:
+; ASM: granulated_workitem_vgpr_count = ((0&4294967232)|(unknown&63))&63
+; ASM: granulated_wavefront_sgpr_count = (((0&4294967232)|(unknown&63))>>6)&15
+; ASM: priority = (((0&4294967232)|(unknown&63))>>10)&3
+; ASM: float_mode = (((0&4294967232)|(unknown&63))>>12)&255
+; ASM: priv = (((0&4294967232)|(unknown&63))>>20)&1
+; ASM: enable_dx10_clamp = (((0&4294967232)|(unknown&63))>>21)&1
+; ASM: debug_mode = (((0&4294967232)|(unknown&63))>>22)&1
+; ASM: enable_ieee_mode = (((0&4294967232)|(unknown&63))>>23)&1
+; ASM: enable_wgp_mode = (((0&4294967232)|(unknown&63))>>29)&1
+; ASM: enable_mem_ordered = (((0&4294967232)|(unknown&63))>>30)&1
+; ASM: enable_fwd_progress = (((0&4294967232)|(unknown&63))>>31)&1
+.section .unknown_granulated_workitem_vgpr_count
+unknown_granulated_workitem_vgpr_count:
+ .amd_kernel_code_t
+ granulated_workitem_vgpr_count = unknown
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: unknown_enable_sgpr_workgroup_id_x:
+; ASM: enable_sgpr_private_segment_wave_byte_offset = ((0&4294967167)|((unknown&1)<<7))&1
+; ASM: user_sgpr_count = (((0&4294967167)|((unknown&1)<<7))>>1)&31
+; ASM: enable_trap_handler = (((0&4294967167)|((unknown&1)<<7))>>6)&1
+; ASM: enable_sgpr_workgroup_id_x = (((0&4294967167)|((unknown&1)<<7))>>7)&1
+; ASM: enable_sgpr_workgroup_id_y = (((0&4294967167)|((unknown&1)<<7))>>8)&1
+; ASM: enable_sgpr_workgroup_id_z = (((0&4294967167)|((unknown&1)<<7))>>9)&1
+; ASM: enable_sgpr_workgroup_info = (((0&4294967167)|((unknown&1)<<7))>>10)&1
+; ASM: enable_vgpr_workitem_id = (((0&4294967167)|((unknown&1)<<7))>>11)&3
+; ASM: enable_exception_msb = (((0&4294967167)|((unknown&1)<<7))>>13)&3
+; ASM: granulated_lds_size = (((0&4294967167)|((unknown&1)<<7))>>15)&511
+; ASM: enable_exception = (((0&4294967167)|((unknown&1)<<7))>>24)&127
+.section .unknown_enable_sgpr_workgroup_id_x
+unknown_enable_sgpr_workgroup_id_x:
+ .amd_kernel_code_t
+ enable_sgpr_workgroup_id_x = unknown
+ .end_amd_kernel_code_t
+ s_endpgm
+
+.set unknown, 1
>From ccdeaf31408e00cd9de1f352102a517368bec733 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Thu, 9 May 2024 17:05:15 +0100
Subject: [PATCH 2/5] Formatting, rename struct, minor feedback changes
---
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 27 +++++---
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 5 +-
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 7 +-
.../MCTargetDesc/AMDGPUMCKernelCodeT.cpp | 65 +++++++++----------
.../AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h | 6 +-
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 4 +-
.../MCTargetDesc/AMDGPUTargetStreamer.h | 8 +--
7 files changed, 65 insertions(+), 57 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 8343d3d83d22e..f4a5bd10d5579 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -205,7 +205,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
if (STM.isMesaKernel(F) &&
(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
- MCKernelCodeT KernelCode;
+ AMDGPUMCKernelCodeT KernelCode;
getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
KernelCode.validate(&STM, MF->getContext());
getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
@@ -1321,7 +1321,7 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
}
}
-void AMDGPUAsmPrinter::getAmdKernelCode(MCKernelCodeT &Out,
+void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
const SIProgramInfo &CurrentProgramInfo,
const MachineFunction &MF) const {
const Function &F = MF.getFunction();
@@ -1341,7 +1341,8 @@ void AMDGPUAsmPrinter::getAmdKernelCode(MCKernelCodeT &Out,
Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
{
- const MCExpr *Shift = MCConstantExpr::create(AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT, Ctx);
+ const MCExpr *Shift = MCConstantExpr::create(
+ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT, Ctx);
Out.is_dynamic_callstack = MCBinaryExpr::createShl(
CurrentProgramInfo.DynamicCallStack, Shift, Ctx);
}
@@ -1353,32 +1354,37 @@ void AMDGPUAsmPrinter::getAmdKernelCode(MCKernelCodeT &Out,
const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();
if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
Out.KernelCode.code_properties |=
- AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
}
if (UserSGPRInfo.hasDispatchPtr())
- Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ Out.KernelCode.code_properties |=
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5)
Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
if (UserSGPRInfo.hasKernargSegmentPtr())
- Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
+ Out.KernelCode.code_properties |=
+ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
if (UserSGPRInfo.hasDispatchID())
Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
if (UserSGPRInfo.hasFlatScratchInit())
- Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+ Out.KernelCode.code_properties |=
+ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
if (UserSGPRInfo.hasDispatchPtr())
- Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ Out.KernelCode.code_properties |=
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (STM.isXNACKEnabled())
Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
Align MaxKernArgAlign;
- Out.KernelCode.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
+ Out.KernelCode.kernarg_segment_byte_size =
+ STM.getKernArgSegmentSize(F, MaxKernArgAlign);
Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
@@ -1387,7 +1393,8 @@ void AMDGPUAsmPrinter::getAmdKernelCode(MCKernelCodeT &Out,
// kernarg_segment_alignment is specified as log of the alignment.
// The minimum alignment is 16.
// FIXME: The metadata treats the minimum as 4?
- Out.KernelCode.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
+ Out.KernelCode.kernarg_segment_alignment =
+ Log2(std::max(Align(16), MaxKernArgAlign));
}
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index c5abbd3c8c084..87156f27fc6c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -27,7 +27,7 @@ class MCOperand;
namespace AMDGPU {
struct MCKernelDescriptor;
-struct MCKernelCodeT;
+struct AMDGPUMCKernelCodeT;
namespace HSAMD {
class MetadataStreamer;
}
@@ -49,7 +49,8 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
- void getAmdKernelCode(AMDGPU::MCKernelCodeT &Out, const SIProgramInfo &KernelInfo,
+ void getAmdKernelCode(AMDGPU::AMDGPUMCKernelCodeT &Out,
+ const SIProgramInfo &KernelInfo,
const MachineFunction &MF) const;
/// Emit register usage information so that the GPU driver
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index b8bdf816a9932..7de2d52a8337b 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1341,7 +1341,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool ParseDirectiveAMDGCNTarget();
bool ParseDirectiveAMDHSACodeObjectVersion();
bool ParseDirectiveAMDHSAKernel();
- bool ParseAMDKernelCodeTValue(StringRef ID, MCKernelCodeT &Header);
+ bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
bool ParseDirectiveAMDKernelCodeT();
// TODO: Possibly make subtargetHasRegister const.
bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
@@ -5873,7 +5873,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
return false;
}
-bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, MCKernelCodeT &C) {
+bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
+ AMDGPUMCKernelCodeT &C) {
// max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
// assembly for backwards compatibility.
if (ID == "max_scratch_backing_memory_byte_size") {
@@ -5917,7 +5918,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, MCKernelCodeT &C) {
}
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
- MCKernelCodeT KernelCode;
+ AMDGPUMCKernelCodeT KernelCode;
KernelCode.initDefault(&getSTI(), getContext());
while (true) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
index 7c081d98dadbf..72d3bfb48b94a 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
@@ -22,11 +22,11 @@
using namespace llvm;
using namespace llvm::AMDGPU;
-// Generates the following for MCKernelCodeT struct members:
+// Generates the following for AMDGPUMCKernelCodeT struct members:
// - HasMemberXXXXX class
-// A check to see if MCKernelCodeT has a specific member so it can determine
-// which of the original amd_kernel_code_t members are duplicated (if the
-// names don't match, the table driven strategy won't work).
+// A check to see if AMDGPUMCKernelCodeT has a specific member so it can
+// determine which of the original amd_kernel_code_t members are duplicated
+// (if the names don't match, the table driven strategy won't work).
// - GetMemberXXXXX class
// A retrieval helper for said member (of type const MCExpr *&). Will return
// a `Phony` const MCExpr * initialized to nullptr to preserve reference
@@ -37,7 +37,8 @@ using namespace llvm::AMDGPU;
struct KnownWithMember { \
int member; \
}; \
- class AmbiguousDerived : public MCKernelCodeT, public KnownWithMember {}; \
+ class AmbiguousDerived : public AMDGPUMCKernelCodeT, \
+ public KnownWithMember {}; \
template <typename U> \
static constexpr std::false_type Test(decltype(U::member) *); \
template <typename U> static constexpr std::true_type Test(...); \
@@ -143,7 +144,7 @@ GEN_HAS_MEMBER(runtime_loader_kernel_symbol)
static ArrayRef<StringRef> get_amd_kernel_code_t_FldNames() {
static StringRef const Table[] = {
- "", // not found placeholder
+ "", // not found placeholder
#define RECORD(name, altName, print, parse) #name
#include "Utils/AMDKernelCodeTInfo.h"
#undef RECORD
@@ -153,7 +154,7 @@ static ArrayRef<StringRef> get_amd_kernel_code_t_FldNames() {
static ArrayRef<StringRef> get_amd_kernel_code_t_FldAltNames() {
static StringRef const Table[] = {
- "", // not found placeholder
+ "", // not found placeholder
#define RECORD(name, altName, print, parse) #altName
#include "Utils/AMDKernelCodeTInfo.h"
#undef RECORD
@@ -171,7 +172,7 @@ static ArrayRef<bool> hasMCExprVersionTable() {
}
static ArrayRef<std::reference_wrapper<const MCExpr *>>
-getMCExprIndexTable(MCKernelCodeT &C) {
+getMCExprIndexTable(AMDGPUMCKernelCodeT &C) {
static std::reference_wrapper<const MCExpr *> Table[] = {
#define RECORD(name, altName, print, parse) GetMember##name::Get(C)
#include "Utils/AMDKernelCodeTInfo.h"
@@ -235,23 +236,19 @@ static const MCExpr *MaskShiftGet(const MCExpr *Val, uint32_t Mask,
}
template <typename T, T amd_kernel_code_t::*ptr>
-static void printField(StringRef Name, const MCKernelCodeT &C, raw_ostream &OS,
- MCContext &Ctx) {
- (void)Ctx;
- OS << Name << " = ";
- OS << (int)(C.KernelCode.*ptr);
+static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C,
+ raw_ostream &OS, MCContext &) {
+ OS << Name << " = " << (int)(C.KernelCode.*ptr);
}
template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
-static void printBitField(StringRef Name, const MCKernelCodeT &C,
- raw_ostream &OS, MCContext &Ctx) {
- (void)Ctx;
+static void printBitField(StringRef Name, const AMDGPUMCKernelCodeT &C,
+ raw_ostream &OS, MCContext &) {
const auto Mask = (static_cast<T>(1) << width) - 1;
- OS << Name << " = ";
- OS << (int)((C.KernelCode.*ptr >> shift) & Mask);
+ OS << Name << " = " << (int)((C.KernelCode.*ptr >> shift) & Mask);
}
-using PrintFx = void (*)(StringRef, const MCKernelCodeT &, raw_ostream &,
+using PrintFx = void (*)(StringRef, const AMDGPUMCKernelCodeT &, raw_ostream &,
MCContext &);
static ArrayRef<PrintFx> getPrinterTable() {
@@ -261,7 +258,7 @@ static ArrayRef<PrintFx> getPrinterTable() {
#define COMPPGM2(name, aname, AccMacro) \
COMPPGM(name, aname, C_00B84C_##AccMacro, S_00B84C_##AccMacro, 32)
#define PRINTCOMP(Complement, PGMType) \
- [](StringRef Name, const MCKernelCodeT &C, raw_ostream &OS, \
+ [](StringRef Name, const AMDGPUMCKernelCodeT &C, raw_ostream &OS, \
MCContext &Ctx) { \
OS << Name << " = "; \
auto [Shift, Mask] = getShiftMask(Complement); \
@@ -303,7 +300,7 @@ static bool expectAbsExpression(MCAsmParser &MCParser, int64_t &Value,
}
template <typename T, T amd_kernel_code_t::*ptr>
-static bool parseField(MCKernelCodeT &C, MCAsmParser &MCParser,
+static bool parseField(AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser,
raw_ostream &Err) {
int64_t Value = 0;
if (!expectAbsExpression(MCParser, Value, Err))
@@ -313,7 +310,7 @@ static bool parseField(MCKernelCodeT &C, MCAsmParser &MCParser,
}
template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
-static bool parseBitField(MCKernelCodeT &C, MCAsmParser &MCParser,
+static bool parseBitField(AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser,
raw_ostream &Err) {
int64_t Value = 0;
if (!expectAbsExpression(MCParser, Value, Err))
@@ -339,7 +336,7 @@ static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value,
return true;
}
-using ParseFx = bool (*)(MCKernelCodeT &, MCAsmParser &, raw_ostream &);
+using ParseFx = bool (*)(AMDGPUMCKernelCodeT &, MCAsmParser &, raw_ostream &);
static ArrayRef<ParseFx> getParserTable() {
static const ParseFx Table[] = {
@@ -348,7 +345,8 @@ static ArrayRef<ParseFx> getParserTable() {
#define COMPPGM2(name, aname, AccMacro) \
COMPPGM(name, aname, G_00B84C_##AccMacro, C_00B84C_##AccMacro, 32)
#define PARSECOMP(Complement, PGMType) \
- [](MCKernelCodeT &C, MCAsmParser &MCParser, raw_ostream &Err) -> bool { \
+ [](AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser, \
+ raw_ostream &Err) -> bool { \
MCContext &Ctx = MCParser.getContext(); \
const MCExpr *Value; \
if (!parseExpr(MCParser, Value, Err)) \
@@ -376,14 +374,15 @@ static ArrayRef<ParseFx> getParserTable() {
return ArrayRef(Table);
}
-static void printAmdKernelCodeField(const MCKernelCodeT &C, int FldIndex,
+static void printAmdKernelCodeField(const AMDGPUMCKernelCodeT &C, int FldIndex,
raw_ostream &OS, MCContext &Ctx) {
auto Printer = getPrinterTable()[FldIndex];
if (Printer)
Printer(get_amd_kernel_code_t_FldNames()[FldIndex + 1], C, OS, Ctx);
}
-void MCKernelCodeT::initDefault(const MCSubtargetInfo *STI, MCContext &Ctx) {
+void AMDGPUMCKernelCodeT::initDefault(const MCSubtargetInfo *STI,
+ MCContext &Ctx) {
AMDGPU::initDefaultAMDKernelCodeT(KernelCode, STI);
const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
compute_pgm_resource1_registers = MCConstantExpr::create(
@@ -396,7 +395,7 @@ void MCKernelCodeT::initDefault(const MCSubtargetInfo *STI, MCContext &Ctx) {
workitem_private_segment_byte_size = ZeroExpr;
}
-void MCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) {
+void AMDGPUMCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) {
int64_t Value;
if (!compute_pgm_resource1_registers->evaluateAsAbsolute(Value))
return;
@@ -427,13 +426,13 @@ void MCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) {
}
}
-const MCExpr *&MCKernelCodeT::getMCExprForIndex(int Index) {
+const MCExpr *&AMDGPUMCKernelCodeT::getMCExprForIndex(int Index) {
auto IndexTable = getMCExprIndexTable(*this);
return IndexTable[Index].get();
}
-bool MCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser,
- raw_ostream &Err) {
+bool AMDGPUMCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser,
+ raw_ostream &Err) {
const int Idx = get_amd_kernel_code_t_FieldIndex(ID);
if (Idx < 0) {
Err << "unexpected amd_kernel_code_t field name " << ID;
@@ -451,8 +450,8 @@ bool MCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser,
return Parser ? Parser(*this, MCParser, Err) : false;
}
-void MCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, const char *tab,
- MCContext &Ctx) {
+void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, const char *tab,
+ MCContext &Ctx) {
const int Size = hasMCExprVersionTable().size();
for (int i = 0; i < Size; ++i) {
OS << tab;
@@ -471,7 +470,7 @@ void MCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, const char *tab,
}
}
-void MCKernelCodeT::EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx) {
+void AMDGPUMCKernelCodeT::EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx) {
OS.emitIntValue(KernelCode.amd_kernel_code_version_major, /*Size=*/4);
OS.emitIntValue(KernelCode.amd_kernel_code_version_minor, /*Size=*/4);
OS.emitIntValue(KernelCode.amd_machine_kind, /*Size=*/2);
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
index 66c5d1107487b..278b0827f07a4 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
@@ -17,8 +17,8 @@
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELCODET_H
#include "AMDKernelCodeT.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
namespace llvm {
class MCAsmParser;
@@ -29,8 +29,8 @@ class MCSubtargetInfo;
class raw_ostream;
namespace AMDGPU {
-struct MCKernelCodeT {
- MCKernelCodeT() = default;
+struct AMDGPUMCKernelCodeT {
+ AMDGPUMCKernelCodeT() = default;
amd_kernel_code_t KernelCode;
const MCExpr *compute_pgm_resource1_registers = nullptr;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index e58409e8b0cbc..efbbe93ceb3c2 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -240,7 +240,7 @@ void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
OS << "\t.amdhsa_code_object_version " << COV << '\n';
}
-void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(MCKernelCodeT &Header) {
+void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
OS << "\t.amd_kernel_code_t\n";
Header.EmitKernelCodeT(OS, "\t\t", getContext());
OS << "\t.end_amd_kernel_code_t\n";
@@ -788,7 +788,7 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
-void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(MCKernelCodeT &Header) {
+void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
MCStreamer &OS = getStreamer();
OS.pushSection();
Header.EmitKernelCodeT(OS, getContext());
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index ea5d1d379f785..399e0a7dfd098 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -21,7 +21,7 @@ class formatted_raw_ostream;
namespace AMDGPU {
-struct MCKernelCodeT;
+struct AMDGPUMCKernelCodeT;
struct MCKernelDescriptor;
namespace HSAMD {
struct Metadata;
@@ -53,7 +53,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
CodeObjectVersion = COV;
}
- virtual void EmitAMDKernelCodeT(AMDGPU::MCKernelCodeT &Header){};
+ virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header){};
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type){};
@@ -129,7 +129,7 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override;
- void EmitAMDKernelCodeT(AMDGPU::MCKernelCodeT &Header) override;
+ void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override;
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
@@ -185,7 +185,7 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
void EmitDirectiveAMDGCNTarget() override;
- void EmitAMDKernelCodeT(AMDGPU::MCKernelCodeT &Header) override;
+ void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override;
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
>From 7878171c9051d23246feaa8bbfcd083d23ff49f7 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Thu, 9 May 2024 17:33:50 +0100
Subject: [PATCH 3/5] Formatting that gets undone by local clang-format
---
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 399e0a7dfd098..e5c90060cb5d0 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -53,7 +53,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
CodeObjectVersion = COV;
}
- virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header){};
+ virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) {};
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type){};
>From 0761ef524557e5ffe7711f95e264475967952fae Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Mon, 13 May 2024 14:40:09 +0100
Subject: [PATCH 4/5] Feedback, remove AMDKernelCodeTUtils files as they're not
used
---
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 1 -
.../MCTargetDesc/AMDGPUMCKernelCodeT.cpp | 17 +-
.../AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h | 2 +-
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 3 +-
.../AMDGPU/Utils/AMDKernelCodeTUtils.cpp | 177 ------------------
.../Target/AMDGPU/Utils/AMDKernelCodeTUtils.h | 35 ----
llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt | 1 -
7 files changed, 10 insertions(+), 226 deletions(-)
delete mode 100644 llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
delete mode 100644 llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 7de2d52a8337b..1e81efd0b64bd 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -18,7 +18,6 @@
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUAsmUtils.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/StringSet.h"
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
index 72d3bfb48b94a..8e1d8e6154d21 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
@@ -142,8 +142,8 @@ GEN_HAS_MEMBER(wavefront_size)
GEN_HAS_MEMBER(call_convention)
GEN_HAS_MEMBER(runtime_loader_kernel_symbol)
-static ArrayRef<StringRef> get_amd_kernel_code_t_FldNames() {
- static StringRef const Table[] = {
+static ArrayRef<StringLiteral> get_amd_kernel_code_t_FldNames() {
+ static constexpr StringLiteral const Table[] = {
"", // not found placeholder
#define RECORD(name, altName, print, parse) #name
#include "Utils/AMDKernelCodeTInfo.h"
@@ -152,8 +152,8 @@ static ArrayRef<StringRef> get_amd_kernel_code_t_FldNames() {
return ArrayRef(Table);
}
-static ArrayRef<StringRef> get_amd_kernel_code_t_FldAltNames() {
- static StringRef const Table[] = {
+static ArrayRef<StringLiteral> get_amd_kernel_code_t_FldAltNames() {
+ static constexpr StringLiteral const Table[] = {
"", // not found placeholder
#define RECORD(name, altName, print, parse) #altName
#include "Utils/AMDKernelCodeTInfo.h"
@@ -181,8 +181,8 @@ getMCExprIndexTable(AMDGPUMCKernelCodeT &C) {
return ArrayRef(Table);
}
-static StringMap<int> createIndexMap(const ArrayRef<StringRef> &names,
- const ArrayRef<StringRef> &altNames) {
+static StringMap<int> createIndexMap(ArrayRef<StringLiteral> names,
+ ArrayRef<StringLiteral> altNames) {
StringMap<int> map;
assert(names.size() == altNames.size());
for (unsigned i = 0; i < names.size(); ++i) {
@@ -450,11 +450,10 @@ bool AMDGPUMCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser,
return Parser ? Parser(*this, MCParser, Err) : false;
}
-void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, const char *tab,
- MCContext &Ctx) {
+void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx) {
const int Size = hasMCExprVersionTable().size();
for (int i = 0; i < Size; ++i) {
- OS << tab;
+ OS << "\t\t";
if (hasMCExprVersionTable()[i]) {
OS << get_amd_kernel_code_t_FldNames()[i + 1] << " = ";
int64_t Val;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
index 278b0827f07a4..f1f61f130f944 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
@@ -49,7 +49,7 @@ struct AMDGPUMCKernelCodeT {
const MCExpr *&getMCExprForIndex(int Index);
bool ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser, raw_ostream &Err);
- void EmitKernelCodeT(raw_ostream &OS, const char *tab, MCContext &Ctx);
+ void EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx);
void EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx);
};
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index efbbe93ceb3c2..f2a93c5ebaf18 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -15,7 +15,6 @@
#include "AMDGPUMCKernelDescriptor.h"
#include "AMDGPUPTNote.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAssembler.h"
@@ -242,7 +241,7 @@ void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
OS << "\t.amd_kernel_code_t\n";
- Header.EmitKernelCodeT(OS, "\t\t", getContext());
+ Header.EmitKernelCodeT(OS, getContext());
OS << "\t.end_amd_kernel_code_t\n";
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
deleted file mode 100644
index 6bbc8c3157187..0000000000000
--- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-//===- AMDKernelCodeTUtils.cpp --------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file - utility functions to parse/print amd_kernel_code_t structure
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDKernelCodeTUtils.h"
-#include "AMDKernelCodeT.h"
-#include "SIDefines.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-static ArrayRef<StringRef> get_amd_kernel_code_t_FldNames() {
- static StringRef const Table[] = {
- "", // not found placeholder
-#define RECORD(name, altName, print, parse) #name
-#include "AMDKernelCodeTInfo.h"
-#undef RECORD
- };
- return ArrayRef(Table);
-}
-
-static ArrayRef<StringRef> get_amd_kernel_code_t_FldAltNames() {
- static StringRef const Table[] = {
- "", // not found placeholder
-#define RECORD(name, altName, print, parse) #altName
-#include "AMDKernelCodeTInfo.h"
-#undef RECORD
- };
- return ArrayRef(Table);
-}
-
-static StringMap<int> createIndexMap(const ArrayRef<StringRef> &names,
- const ArrayRef<StringRef> &altNames) {
- StringMap<int> map;
- assert(names.size() == altNames.size());
- for (unsigned i = 0; i < names.size(); ++i) {
- map.insert(std::pair(names[i], i));
- map.insert(std::pair(altNames[i], i));
- }
- return map;
-}
-
-static int get_amd_kernel_code_t_FieldIndex(StringRef name) {
- static const auto map = createIndexMap(get_amd_kernel_code_t_FldNames(),
- get_amd_kernel_code_t_FldAltNames());
- return map.lookup(name) - 1; // returns -1 if not found
-}
-
-static StringRef get_amd_kernel_code_t_FieldName(int index) {
- return get_amd_kernel_code_t_FldNames()[index + 1];
-}
-
-// Field printing
-
-static raw_ostream &printName(raw_ostream &OS, StringRef Name) {
- return OS << Name << " = ";
-}
-
-template <typename T, T amd_kernel_code_t::*ptr>
-static void printField(StringRef Name, const amd_kernel_code_t &C,
- raw_ostream &OS) {
- printName(OS, Name) << (int)(C.*ptr);
-}
-
-template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
-static void printBitField(StringRef Name, const amd_kernel_code_t &c,
- raw_ostream &OS) {
- const auto Mask = (static_cast<T>(1) << width) - 1;
- printName(OS, Name) << (int)((c.*ptr >> shift) & Mask);
-}
-
-using PrintFx = void(*)(StringRef, const amd_kernel_code_t &, raw_ostream &);
-
-static ArrayRef<PrintFx> getPrinterTable() {
- static const PrintFx Table[] = {
-#define RECORD(name, altName, print, parse) print
-#include "AMDKernelCodeTInfo.h"
-#undef RECORD
- };
- return ArrayRef(Table);
-}
-
-void llvm::printAmdKernelCodeField(const amd_kernel_code_t &C,
- int FldIndex,
- raw_ostream &OS) {
- auto Printer = getPrinterTable()[FldIndex];
- if (Printer)
- Printer(get_amd_kernel_code_t_FieldName(FldIndex), C, OS);
-}
-
-void llvm::dumpAmdKernelCode(const amd_kernel_code_t *C,
- raw_ostream &OS,
- const char *tab) {
- const int Size = getPrinterTable().size();
- for (int i = 0; i < Size; ++i) {
- OS << tab;
- printAmdKernelCodeField(*C, i, OS);
- OS << '\n';
- }
-}
-
-// Field parsing
-
-static bool expectAbsExpression(MCAsmParser &MCParser, int64_t &Value, raw_ostream& Err) {
-
- if (MCParser.getLexer().isNot(AsmToken::Equal)) {
- Err << "expected '='";
- return false;
- }
- MCParser.getLexer().Lex();
-
- if (MCParser.parseAbsoluteExpression(Value)) {
- Err << "integer absolute expression expected";
- return false;
- }
- return true;
-}
-
-template <typename T, T amd_kernel_code_t::*ptr>
-static bool parseField(amd_kernel_code_t &C, MCAsmParser &MCParser,
- raw_ostream &Err) {
- int64_t Value = 0;
- if (!expectAbsExpression(MCParser, Value, Err))
- return false;
- C.*ptr = (T)Value;
- return true;
-}
-
-template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
-static bool parseBitField(amd_kernel_code_t &C, MCAsmParser &MCParser,
- raw_ostream &Err) {
- int64_t Value = 0;
- if (!expectAbsExpression(MCParser, Value, Err))
- return false;
- const uint64_t Mask = ((UINT64_C(1) << width) - 1) << shift;
- C.*ptr &= (T)~Mask;
- C.*ptr |= (T)((Value << shift) & Mask);
- return true;
-}
-
-using ParseFx = bool(*)(amd_kernel_code_t &, MCAsmParser &MCParser,
- raw_ostream &Err);
-
-static ArrayRef<ParseFx> getParserTable() {
- static const ParseFx Table[] = {
-#define RECORD(name, altName, print, parse) parse
-#include "AMDKernelCodeTInfo.h"
-#undef RECORD
- };
- return ArrayRef(Table);
-}
-
-bool llvm::parseAmdKernelCodeField(StringRef ID,
- MCAsmParser &MCParser,
- amd_kernel_code_t &C,
- raw_ostream &Err) {
- const int Idx = get_amd_kernel_code_t_FieldIndex(ID);
- if (Idx < 0) {
- Err << "unexpected amd_kernel_code_t field name " << ID;
- return false;
- }
- auto Parser = getParserTable()[Idx];
- return Parser ? Parser(C, MCParser, Err) : false;
-}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
deleted file mode 100644
index 41d0e0d745e5e..0000000000000
--- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
+++ /dev/null
@@ -1,35 +0,0 @@
-//===- AMDGPUKernelCodeTUtils.h - helpers for amd_kernel_code_t -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file AMDKernelCodeTUtils.h
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDKERNELCODETUTILS_H
-#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDKERNELCODETUTILS_H
-
-struct amd_kernel_code_t;
-
-namespace llvm {
-
-class MCAsmParser;
-class raw_ostream;
-class StringRef;
-
-void printAmdKernelCodeField(const amd_kernel_code_t &C, int FldIndex,
- raw_ostream &OS);
-
-void dumpAmdKernelCode(const amd_kernel_code_t *C, raw_ostream &OS,
- const char *tab);
-
-bool parseAmdKernelCodeField(StringRef ID, MCAsmParser &Parser,
- amd_kernel_code_t &C, raw_ostream &Err);
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDKERNELCODETUTILS_H
diff --git a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
index 19d3b690b1315..814d584c149f0 100644
--- a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
@@ -3,7 +3,6 @@ add_llvm_component_library(LLVMAMDGPUUtils
AMDGPUBaseInfo.cpp
AMDGPUMemoryUtils.cpp
AMDGPUPALMetadata.cpp
- AMDKernelCodeTUtils.cpp
LINK_COMPONENTS
Analysis
>From 68961589a74346101490158fe5cc6b8122e71910 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <Janek.vanOirschot at amd.com>
Date: Fri, 17 May 2024 06:58:34 -0700
Subject: [PATCH 5/5] Feedback, move destination files of AMDGPUMCKernelCodeT
struct, overwrite all members (and change table driven strategy to conform
that)
---
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 38 ++---
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 9 +-
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 2 +-
.../Target/AMDGPU/MCTargetDesc/CMakeLists.txt | 2 -
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 69 +++++---
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 3 +-
.../Target/AMDGPU/Utils/AMDKernelCodeTInfo.h | 11 +-
.../AMDKernelCodeTUtils.cpp} | 160 +++++++++++-------
.../AMDKernelCodeTUtils.h} | 38 ++++-
llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt | 2 +
10 files changed, 212 insertions(+), 122 deletions(-)
rename llvm/lib/Target/AMDGPU/{MCTargetDesc/AMDGPUMCKernelCodeT.cpp => Utils/AMDKernelCodeTUtils.cpp} (78%)
rename llvm/lib/Target/AMDGPU/{MCTargetDesc/AMDGPUMCKernelCodeT.h => Utils/AMDKernelCodeTUtils.h} (57%)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index f4a5bd10d5579..6ff150a0570e1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -22,13 +22,13 @@
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCExpr.h"
-#include "MCTargetDesc/AMDGPUMCKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "R600AsmPrinter.h"
#include "SIMachineFunctionInfo.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -1332,13 +1332,13 @@ void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
MCContext &Ctx = MF.getContext();
- AMDGPU::initDefaultAMDKernelCodeT(Out.KernelCode, &STM);
+ AMDGPU::initDefaultAMDKernelCodeT(Out, &STM);
Out.compute_pgm_resource1_registers =
CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx);
Out.compute_pgm_resource2_registers =
CurrentProgramInfo.getComputePGMRSrc2(Ctx);
- Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
+ Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
{
const MCExpr *Shift = MCConstantExpr::create(
@@ -1347,54 +1347,46 @@ void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
CurrentProgramInfo.DynamicCallStack, Shift, Ctx);
}
- AMD_HSA_BITS_SET(Out.KernelCode.code_properties,
- AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
+ AMD_HSA_BITS_SET(Out.code_properties, AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();
if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
- Out.KernelCode.code_properties |=
- AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
}
if (UserSGPRInfo.hasDispatchPtr())
- Out.KernelCode.code_properties |=
- AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5)
- Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
if (UserSGPRInfo.hasKernargSegmentPtr())
- Out.KernelCode.code_properties |=
- AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
if (UserSGPRInfo.hasDispatchID())
- Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
if (UserSGPRInfo.hasFlatScratchInit())
- Out.KernelCode.code_properties |=
- AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
if (UserSGPRInfo.hasDispatchPtr())
- Out.KernelCode.code_properties |=
- AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (STM.isXNACKEnabled())
- Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
+ Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
Align MaxKernArgAlign;
- Out.KernelCode.kernarg_segment_byte_size =
- STM.getKernArgSegmentSize(F, MaxKernArgAlign);
+ Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
- Out.KernelCode.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
+ Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
// kernarg_segment_alignment is specified as log of the alignment.
// The minimum alignment is 16.
// FIXME: The metadata treats the minimum as 4?
- Out.KernelCode.kernarg_segment_alignment =
- Log2(std::max(Align(16), MaxKernArgAlign));
+ Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
}
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 1e81efd0b64bd..8b1c1c53de30d 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -8,7 +8,6 @@
#include "AMDKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCExpr.h"
-#include "MCTargetDesc/AMDGPUMCKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
@@ -18,6 +17,7 @@
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUAsmUtils.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/StringSet.h"
@@ -5889,8 +5889,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
Lex();
if (ID == "enable_wavefront_size32") {
- if (C.KernelCode.code_properties &
- AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
+ if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
if (!isGFX10Plus())
return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
@@ -5902,12 +5901,12 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
}
if (ID == "wavefront_size") {
- if (C.KernelCode.wavefront_size == 5) {
+ if (C.wavefront_size == 5) {
if (!isGFX10Plus())
return TokError("wavefront_size=5 is only allowed on GFX10+");
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
return TokError("wavefront_size=5 requires +WavefrontSize32");
- } else if (C.KernelCode.wavefront_size == 6) {
+ } else if (C.wavefront_size == 6) {
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
return TokError("wavefront_size=6 requires +WavefrontSize64");
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index f2a93c5ebaf18..00e64e3419ba0 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -11,10 +11,10 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetStreamer.h"
-#include "AMDGPUMCKernelCodeT.h"
#include "AMDGPUMCKernelDescriptor.h"
#include "AMDGPUPTNote.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAssembler.h"
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
index 5ff44ee70afa6..14a02b6d8e368 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -8,7 +8,6 @@ add_llvm_component_library(LLVMAMDGPUDesc
AMDGPUMCExpr.cpp
AMDGPUMCTargetDesc.cpp
AMDGPUTargetStreamer.cpp
- AMDGPUMCKernelCodeT.cpp
AMDGPUMCKernelDescriptor.cpp
R600InstPrinter.cpp
R600MCCodeEmitter.cpp
@@ -21,7 +20,6 @@ add_llvm_component_library(LLVMAMDGPUDesc
CodeGenTypes
Core
MC
- MCParser
Support
TargetParser
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 2beaf903542bd..2ef7ed2737efe 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -11,6 +11,7 @@
#include "AMDGPUAsmUtils.h"
#include "AMDKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/Attributes.h"
@@ -1218,39 +1219,67 @@ unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI,
}
} // end namespace IsaInfo
-void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
+void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode,
const MCSubtargetInfo *STI) {
IsaVersion Version = getIsaVersion(STI->getCPU());
- memset(&Header, 0, sizeof(Header));
-
- Header.amd_kernel_code_version_major = 1;
- Header.amd_kernel_code_version_minor = 2;
- Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
- Header.amd_machine_version_major = Version.Major;
- Header.amd_machine_version_minor = Version.Minor;
- Header.amd_machine_version_stepping = Version.Stepping;
- Header.kernel_code_entry_byte_offset = sizeof(Header);
- Header.wavefront_size = 6;
+ KernelCode.amd_kernel_code_version_major = 0;
+ KernelCode.amd_kernel_code_version_minor = 0;
+ KernelCode.amd_machine_kind = 0;
+ KernelCode.amd_machine_version_major = 0;
+ KernelCode.amd_machine_version_minor = 0;
+ KernelCode.amd_machine_version_stepping = 0;
+ KernelCode.kernel_code_prefetch_byte_offset = 0;
+ KernelCode.kernel_code_prefetch_byte_size = 0;
+ KernelCode.reserved0 = 0;
+ KernelCode.compute_pgm_resource_registers = 0;
+ KernelCode.code_properties = 0;
+ KernelCode.workgroup_group_segment_byte_size = 0;
+ KernelCode.gds_segment_byte_size = 0;
+ KernelCode.kernarg_segment_byte_size = 0;
+ KernelCode.workgroup_fbarrier_count = 0;
+ KernelCode.reserved_vgpr_first = 0;
+ KernelCode.reserved_vgpr_count = 0;
+ KernelCode.reserved_sgpr_first = 0;
+ KernelCode.reserved_sgpr_count = 0;
+ KernelCode.debug_wavefront_private_segment_offset_sgpr = 0;
+ KernelCode.debug_private_segment_buffer_sgpr = 0;
+ KernelCode.kernarg_segment_alignment = 0;
+ KernelCode.group_segment_alignment = 0;
+ KernelCode.private_segment_alignment = 0;
+ KernelCode.call_convention = 0;
+ memset(KernelCode.reserved3, 0, sizeof(KernelCode.reserved3));
+ KernelCode.runtime_loader_kernel_symbol = 0;
+ memset(KernelCode.control_directives, 0,
+ sizeof(KernelCode.control_directives));
+
+ KernelCode.amd_kernel_code_version_major = 1;
+ KernelCode.amd_kernel_code_version_minor = 2;
+ KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
+ KernelCode.amd_machine_version_major = Version.Major;
+ KernelCode.amd_machine_version_minor = Version.Minor;
+ KernelCode.amd_machine_version_stepping = Version.Stepping;
+ KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t);
+ KernelCode.wavefront_size = 6;
// If the code object does not support indirect functions, then the value must
// be 0xffffffff.
- Header.call_convention = -1;
+ KernelCode.call_convention = -1;
// These alignment values are specified in powers of two, so alignment =
// 2^n. The minimum alignment is 2^4 = 16.
- Header.kernarg_segment_alignment = 4;
- Header.group_segment_alignment = 4;
- Header.private_segment_alignment = 4;
+ KernelCode.kernarg_segment_alignment = 4;
+ KernelCode.group_segment_alignment = 4;
+ KernelCode.private_segment_alignment = 4;
if (Version.Major >= 10) {
if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
- Header.wavefront_size = 5;
- Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
+ KernelCode.wavefront_size = 5;
+ KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
}
- Header.compute_pgm_resource_registers |=
- S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
- S_00B848_MEM_ORDERED(1);
+ KernelCode.compute_pgm_resource_registers |=
+ S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
+ S_00B848_MEM_ORDERED(1);
}
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index fc4147df76e3e..3cfc42a7d24d5 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -37,6 +37,7 @@ class raw_ostream;
namespace AMDGPU {
+struct AMDGPUMCKernelCodeT;
struct IsaVersion;
/// Generic target versions emitted by this version of LLVM.
@@ -860,7 +861,7 @@ unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
LLVM_READONLY
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
-void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
+void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header,
const MCSubtargetInfo *STI);
bool isGroupSegment(const GlobalValue *GV);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
index 2a9fa804bc898..75cb6cffbd51b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
@@ -12,12 +12,16 @@
//
//===----------------------------------------------------------------------===//
-#define QNAME(name) amd_kernel_code_t::name
+#define QNAME(name) AMDGPUMCKernelCodeT::name
#define FLD_T(name) decltype(QNAME(name)), &QNAME(name)
+#ifndef PRINTFIELD
+#define PRINTFIELD(sname, aname, name) printField<FLD_T(name)>
+#endif
+
#ifndef FIELD2
-#define FIELD2(sname, aname, name) \
- RECORD(sname, aname, printField<FLD_T(name)>, parseField<FLD_T(name)>)
+#define FIELD2(sname, aname, name) \
+ RECORD(sname, aname, PRINTFIELD(sname, aname, name), parseField<FLD_T(name)>)
#endif
#ifndef FIELD
@@ -163,6 +167,7 @@ FIELD(runtime_loader_kernel_symbol)
#undef QNAME
#undef FLD_T
+#undef PRINTFIELD
#undef FIELD2
#undef FIELD
#undef PRINTCODEPROP
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
similarity index 78%
rename from llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
rename to llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
index 8e1d8e6154d21..bd815d7c8c012 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
@@ -1,12 +1,16 @@
-//===--- AMDHSAKernelCodeT.cpp --------------------------------------------===//
+//===- AMDKernelCodeTUtils.cpp --------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+//
+/// \file - utility functions to parse/print AMDGPUMCKernelCodeT structure
+//
+//===----------------------------------------------------------------------===//
-#include "AMDGPUMCKernelCodeT.h"
+#include "AMDKernelCodeTUtils.h"
#include "AMDKernelCodeT.h"
#include "SIDefines.h"
#include "Utils/AMDGPUBaseInfo.h"
@@ -17,6 +21,7 @@
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -48,18 +53,32 @@ using namespace llvm::AMDGPU;
std::is_same_v<decltype(Test<AmbiguousDerived>(nullptr)), \
std::true_type>; \
}; \
+ class IsMCExpr##member { \
+ template <typename U, \
+ typename std::enable_if_t< \
+ HasMember##member::RESULT && \
+ std::is_same_v<decltype(U::member), const MCExpr *>, \
+ U> * = nullptr> \
+ static constexpr std::true_type HasMCExprType(decltype(U::member) *); \
+ template <typename U> static constexpr std::false_type HasMCExprType(...); \
+ \
+ public: \
+ static constexpr bool RESULT = \
+ std::is_same_v<decltype(HasMCExprType<AMDGPUMCKernelCodeT>(nullptr)), \
+ std::true_type>; \
+ }; \
class GetMember##member { \
public: \
static const MCExpr *Phony; \
- template <typename U, typename std::enable_if_t<HasMember##member::RESULT, \
+ template <typename U, typename std::enable_if_t<IsMCExpr##member::RESULT, \
U> * = nullptr> \
static const MCExpr *&Get(U &C) { \
- assert(HasMember##member::RESULT && \
+ assert(IsMCExpr##member::RESULT && \
"Trying to retrieve member that does not exist."); \
return C.member; \
} \
- template <typename U, typename std::enable_if_t< \
- !HasMember##member::RESULT, U> * = nullptr> \
+ template <typename U, typename std::enable_if_t<!IsMCExpr##member::RESULT, \
+ U> * = nullptr> \
static const MCExpr *&Get(U &C) { \
return Phony; \
} \
@@ -164,7 +183,7 @@ static ArrayRef<StringLiteral> get_amd_kernel_code_t_FldAltNames() {
static ArrayRef<bool> hasMCExprVersionTable() {
static bool const Table[] = {
-#define RECORD(name, altName, print, parse) (HasMember##name::RESULT)
+#define RECORD(name, altName, print, parse) (IsMCExpr##name::RESULT)
#include "Utils/AMDKernelCodeTInfo.h"
#undef RECORD
};
@@ -235,17 +254,34 @@ static const MCExpr *MaskShiftGet(const MCExpr *Val, uint32_t Mask,
return Val;
}
-template <typename T, T amd_kernel_code_t::*ptr>
-static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C,
- raw_ostream &OS, MCContext &) {
- OS << Name << " = " << (int)(C.KernelCode.*ptr);
-}
+class PrintField {
+public:
+ template <typename T, T AMDGPUMCKernelCodeT::*ptr,
+ typename std::enable_if_t<!std::is_integral_v<T>, T> * = nullptr>
+ static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C,
+ raw_ostream &OS, MCContext &Ctx) {
+ OS << Name << " = ";
+ const MCExpr *Value = C.*ptr;
+ int64_t Val;
+ if (Value->evaluateAsAbsolute(Val))
+ OS << Val;
+ else
+ Value->print(OS, Ctx.getAsmInfo());
+ }
+
+ template <typename T, T AMDGPUMCKernelCodeT::*ptr,
+ typename std::enable_if_t<std::is_integral_v<T>, T> * = nullptr>
+ static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C,
+ raw_ostream &OS, MCContext &) {
+ OS << Name << " = " << (int)(C.*ptr);
+ }
+};
-template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
+template <typename T, T AMDGPUMCKernelCodeT::*ptr, int shift, int width = 1>
static void printBitField(StringRef Name, const AMDGPUMCKernelCodeT &C,
raw_ostream &OS, MCContext &) {
const auto Mask = (static_cast<T>(1) << width) - 1;
- OS << Name << " = " << (int)((C.KernelCode.*ptr >> shift) & Mask);
+ OS << Name << " = " << (int)((C.*ptr >> shift) & Mask);
}
using PrintFx = void (*)(StringRef, const AMDGPUMCKernelCodeT &, raw_ostream &,
@@ -257,6 +293,7 @@ static ArrayRef<PrintFx> getPrinterTable() {
COMPPGM(name, aname, C_00B848_##AccMacro, S_00B848_##AccMacro, 0)
#define COMPPGM2(name, aname, AccMacro) \
COMPPGM(name, aname, C_00B84C_##AccMacro, S_00B84C_##AccMacro, 32)
+#define PRINTFIELD(sname, aname, name) PrintField::printField<FLD_T(name)>
#define PRINTCOMP(Complement, PGMType) \
[](StringRef Name, const AMDGPUMCKernelCodeT &C, raw_ostream &OS, \
MCContext &Ctx) { \
@@ -299,25 +336,25 @@ static bool expectAbsExpression(MCAsmParser &MCParser, int64_t &Value,
return true;
}
-template <typename T, T amd_kernel_code_t::*ptr>
+template <typename T, T AMDGPUMCKernelCodeT::*ptr>
static bool parseField(AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser,
raw_ostream &Err) {
int64_t Value = 0;
if (!expectAbsExpression(MCParser, Value, Err))
return false;
- C.KernelCode.*ptr = (T)Value;
+ C.*ptr = (T)Value;
return true;
}
-template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
+template <typename T, T AMDGPUMCKernelCodeT::*ptr, int shift, int width = 1>
static bool parseBitField(AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser,
raw_ostream &Err) {
int64_t Value = 0;
if (!expectAbsExpression(MCParser, Value, Err))
return false;
const uint64_t Mask = ((UINT64_C(1) << width) - 1) << shift;
- C.KernelCode.*ptr &= (T)~Mask;
- C.KernelCode.*ptr |= (T)((Value << shift) & Mask);
+ C.*ptr &= (T)~Mask;
+ C.*ptr |= (T)((Value << shift) & Mask);
return true;
}
@@ -383,12 +420,12 @@ static void printAmdKernelCodeField(const AMDGPUMCKernelCodeT &C, int FldIndex,
void AMDGPUMCKernelCodeT::initDefault(const MCSubtargetInfo *STI,
MCContext &Ctx) {
- AMDGPU::initDefaultAMDKernelCodeT(KernelCode, STI);
+ AMDGPU::initDefaultAMDKernelCodeT(*this, STI);
const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
- compute_pgm_resource1_registers = MCConstantExpr::create(
- KernelCode.compute_pgm_resource_registers & 0xFFFFFFFF, Ctx);
- compute_pgm_resource2_registers = MCConstantExpr::create(
- (KernelCode.compute_pgm_resource_registers >> 32) & 0xffffffff, Ctx);
+ compute_pgm_resource1_registers =
+ MCConstantExpr::create(Lo_32(compute_pgm_resource_registers), Ctx);
+ compute_pgm_resource2_registers =
+ MCConstantExpr::create(Hi_32(compute_pgm_resource_registers), Ctx);
is_dynamic_callstack = ZeroExpr;
wavefront_sgpr_count = ZeroExpr;
workitem_vgpr_count = ZeroExpr;
@@ -470,33 +507,31 @@ void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx) {
}
void AMDGPUMCKernelCodeT::EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx) {
- OS.emitIntValue(KernelCode.amd_kernel_code_version_major, /*Size=*/4);
- OS.emitIntValue(KernelCode.amd_kernel_code_version_minor, /*Size=*/4);
- OS.emitIntValue(KernelCode.amd_machine_kind, /*Size=*/2);
- OS.emitIntValue(KernelCode.amd_machine_version_major, /*Size=*/2);
- OS.emitIntValue(KernelCode.amd_machine_version_minor, /*Size=*/2);
- OS.emitIntValue(KernelCode.amd_machine_version_stepping, /*Size=*/2);
- OS.emitIntValue(KernelCode.kernel_code_entry_byte_offset, /*Size=*/8);
- OS.emitIntValue(KernelCode.kernel_code_prefetch_byte_offset, /*Size=*/8);
- OS.emitIntValue(KernelCode.kernel_code_prefetch_byte_size, /*Size=*/8);
- OS.emitIntValue(KernelCode.reserved0, /*Size=*/8);
+ OS.emitIntValue(amd_kernel_code_version_major, /*Size=*/4);
+ OS.emitIntValue(amd_kernel_code_version_minor, /*Size=*/4);
+ OS.emitIntValue(amd_machine_kind, /*Size=*/2);
+ OS.emitIntValue(amd_machine_version_major, /*Size=*/2);
+ OS.emitIntValue(amd_machine_version_minor, /*Size=*/2);
+ OS.emitIntValue(amd_machine_version_stepping, /*Size=*/2);
+ OS.emitIntValue(kernel_code_entry_byte_offset, /*Size=*/8);
+ OS.emitIntValue(kernel_code_prefetch_byte_offset, /*Size=*/8);
+ OS.emitIntValue(kernel_code_prefetch_byte_size, /*Size=*/8);
+ OS.emitIntValue(reserved0, /*Size=*/8);
if (compute_pgm_resource1_registers != nullptr)
OS.emitValue(compute_pgm_resource1_registers, /*Size=*/4);
else
- OS.emitIntValue(KernelCode.compute_pgm_resource_registers & 0xFFFFFFFF,
+ OS.emitIntValue(Lo_32(compute_pgm_resource_registers),
/*Size=*/4);
if (compute_pgm_resource2_registers != nullptr)
OS.emitValue(compute_pgm_resource2_registers, /*Size=*/4);
else
- OS.emitIntValue((KernelCode.compute_pgm_resource_registers >> 32) &
- 0xFFFFFFFF,
+ OS.emitIntValue(Hi_32(compute_pgm_resource_registers),
/*Size=*/4);
if (is_dynamic_callstack != nullptr) {
- const MCExpr *CodeProps =
- MCConstantExpr::create(KernelCode.code_properties, Ctx);
+ const MCExpr *CodeProps = MCConstantExpr::create(code_properties, Ctx);
CodeProps = MCBinaryExpr::createOr(
CodeProps,
MaskShiftSet(is_dynamic_callstack,
@@ -505,43 +540,42 @@ void AMDGPUMCKernelCodeT::EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx) {
Ctx);
OS.emitValue(CodeProps, /*Size=*/4);
} else
- OS.emitIntValue(KernelCode.code_properties, /*Size=*/4);
+ OS.emitIntValue(code_properties, /*Size=*/4);
if (workitem_private_segment_byte_size != nullptr)
OS.emitValue(workitem_private_segment_byte_size, /*Size=*/4);
else
- OS.emitIntValue(KernelCode.workitem_private_segment_byte_size, /*Size=*/4);
+ OS.emitIntValue(0, /*Size=*/4);
- OS.emitIntValue(KernelCode.workgroup_group_segment_byte_size, /*Size=*/4);
- OS.emitIntValue(KernelCode.gds_segment_byte_size, /*Size=*/4);
- OS.emitIntValue(KernelCode.kernarg_segment_byte_size, /*Size=*/8);
- OS.emitIntValue(KernelCode.workgroup_fbarrier_count, /*Size=*/4);
+ OS.emitIntValue(workgroup_group_segment_byte_size, /*Size=*/4);
+ OS.emitIntValue(gds_segment_byte_size, /*Size=*/4);
+ OS.emitIntValue(kernarg_segment_byte_size, /*Size=*/8);
+ OS.emitIntValue(workgroup_fbarrier_count, /*Size=*/4);
if (wavefront_sgpr_count != nullptr)
OS.emitValue(wavefront_sgpr_count, /*Size=*/2);
else
- OS.emitIntValue(KernelCode.wavefront_sgpr_count, /*Size=*/2);
+ OS.emitIntValue(0, /*Size=*/2);
if (workitem_vgpr_count != nullptr)
OS.emitValue(workitem_vgpr_count, /*Size=*/2);
else
- OS.emitIntValue(KernelCode.workitem_vgpr_count, /*Size=*/2);
+ OS.emitIntValue(0, /*Size=*/2);
- OS.emitIntValue(KernelCode.reserved_vgpr_first, /*Size=*/2);
- OS.emitIntValue(KernelCode.reserved_vgpr_count, /*Size=*/2);
- OS.emitIntValue(KernelCode.reserved_sgpr_first, /*Size=*/2);
- OS.emitIntValue(KernelCode.reserved_sgpr_count, /*Size=*/2);
- OS.emitIntValue(KernelCode.debug_wavefront_private_segment_offset_sgpr,
+ OS.emitIntValue(reserved_vgpr_first, /*Size=*/2);
+ OS.emitIntValue(reserved_vgpr_count, /*Size=*/2);
+ OS.emitIntValue(reserved_sgpr_first, /*Size=*/2);
+ OS.emitIntValue(reserved_sgpr_count, /*Size=*/2);
+ OS.emitIntValue(debug_wavefront_private_segment_offset_sgpr,
/*Size=*/2);
- OS.emitIntValue(KernelCode.debug_private_segment_buffer_sgpr, /*Size=*/2);
- OS.emitIntValue(KernelCode.kernarg_segment_alignment, /*Size=*/1);
- OS.emitIntValue(KernelCode.group_segment_alignment, /*Size=*/1);
- OS.emitIntValue(KernelCode.private_segment_alignment, /*Size=*/1);
- OS.emitIntValue(KernelCode.wavefront_size, /*Size=*/1);
-
- OS.emitIntValue(KernelCode.call_convention, /*Size=*/4);
- OS.emitBytes(StringRef((const char *)KernelCode.reserved3, /*Size=*/12));
- OS.emitIntValue(KernelCode.runtime_loader_kernel_symbol, /*Size=*/8);
- OS.emitBytes(
- StringRef((const char *)KernelCode.control_directives, /*Size=*/16 * 8));
+ OS.emitIntValue(debug_private_segment_buffer_sgpr, /*Size=*/2);
+ OS.emitIntValue(kernarg_segment_alignment, /*Size=*/1);
+ OS.emitIntValue(group_segment_alignment, /*Size=*/1);
+ OS.emitIntValue(private_segment_alignment, /*Size=*/1);
+ OS.emitIntValue(wavefront_size, /*Size=*/1);
+
+ OS.emitIntValue(call_convention, /*Size=*/4);
+ OS.emitBytes(StringRef((const char *)reserved3, /*Size=*/12));
+ OS.emitIntValue(runtime_loader_kernel_symbol, /*Size=*/8);
+ OS.emitBytes(StringRef((const char *)control_directives, /*Size=*/16 * 8));
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
similarity index 57%
rename from llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
rename to llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
index f1f61f130f944..5577d300fd0b9 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
@@ -1,4 +1,4 @@
-//===--- AMDGPUMCKernelCodeT.h --------------------------------*- C++ -*---===//
+//===- AMDGPUKernelCodeTUtils.h - helpers for amd_kernel_code_t -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-/// \file
-/// MC layer struct for amd_kernel_code_t, provides MCExpr functionality where
+/// \file AMDKernelCodeTUtils.h
+/// MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where
/// required.
///
//
@@ -32,7 +32,37 @@ namespace AMDGPU {
struct AMDGPUMCKernelCodeT {
AMDGPUMCKernelCodeT() = default;
- amd_kernel_code_t KernelCode;
+ uint32_t amd_kernel_code_version_major;
+ uint32_t amd_kernel_code_version_minor;
+ uint16_t amd_machine_kind;
+ uint16_t amd_machine_version_major;
+ uint16_t amd_machine_version_minor;
+ uint16_t amd_machine_version_stepping;
+ int64_t kernel_code_entry_byte_offset;
+ int64_t kernel_code_prefetch_byte_offset;
+ uint64_t kernel_code_prefetch_byte_size;
+ uint64_t reserved0;
+ uint64_t compute_pgm_resource_registers;
+ uint32_t code_properties;
+ uint32_t workgroup_group_segment_byte_size;
+ uint32_t gds_segment_byte_size;
+ uint64_t kernarg_segment_byte_size;
+ uint32_t workgroup_fbarrier_count;
+ uint16_t reserved_vgpr_first;
+ uint16_t reserved_vgpr_count;
+ uint16_t reserved_sgpr_first;
+ uint16_t reserved_sgpr_count;
+ uint16_t debug_wavefront_private_segment_offset_sgpr;
+ uint16_t debug_private_segment_buffer_sgpr;
+ uint8_t kernarg_segment_alignment;
+ uint8_t group_segment_alignment;
+ uint8_t private_segment_alignment;
+ uint8_t wavefront_size;
+ int32_t call_convention;
+ uint8_t reserved3[12];
+ uint64_t runtime_loader_kernel_symbol;
+ uint64_t control_directives[16];
+
const MCExpr *compute_pgm_resource1_registers = nullptr;
const MCExpr *compute_pgm_resource2_registers = nullptr;
diff --git a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
index 814d584c149f0..2f4ce8eaf1d60 100644
--- a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_component_library(LLVMAMDGPUUtils
AMDGPUBaseInfo.cpp
AMDGPUMemoryUtils.cpp
AMDGPUPALMetadata.cpp
+ AMDKernelCodeTUtils.cpp
LINK_COMPONENTS
Analysis
@@ -10,6 +11,7 @@ add_llvm_component_library(LLVMAMDGPUUtils
CodeGenTypes
Core
MC
+ MCParser
Support
TargetParser
More information about the llvm-commits
mailing list