[llvm] MCExpr-ify amd_kernel_code_t (PR #91587)
Janek van Oirschot via llvm-commits
llvm-commits at lists.llvm.org
Thu May 9 09:34:16 PDT 2024
https://github.com/JanekvO updated https://github.com/llvm/llvm-project/pull/91587
>From f2ea08b52bf33305571ddf13402672645397d81b Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Thu, 9 May 2024 13:33:47 +0100
Subject: [PATCH 1/3] MCExpr-ify amd_kernel_code_t
---
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 56 +-
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 5 +-
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 54 +-
.../MCTargetDesc/AMDGPUMCKernelCodeT.cpp | 549 ++++++++++++++++++
.../AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h | 59 ++
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 13 +-
.../MCTargetDesc/AMDGPUTargetStreamer.h | 9 +-
.../Target/AMDGPU/MCTargetDesc/CMakeLists.txt | 2 +
llvm/lib/Target/AMDGPU/SIDefines.h | 2 +-
.../Target/AMDGPU/Utils/AMDKernelCodeTInfo.h | 24 +-
llvm/test/MC/AMDGPU/amd_kernel_code_t.s | 171 ++++++
11 files changed, 858 insertions(+), 86 deletions(-)
create mode 100644 llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
create mode 100644 llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
create mode 100644 llvm/test/MC/AMDGPU/amd_kernel_code_t.s
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index de81904143b7..8343d3d83d22 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -19,10 +19,10 @@
#include "AMDGPU.h"
#include "AMDGPUHSAMetadataStreamer.h"
#include "AMDGPUResourceUsageAnalysis.h"
-#include "AMDKernelCodeT.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCExpr.h"
+#include "MCTargetDesc/AMDGPUMCKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "R600AsmPrinter.h"
@@ -205,8 +205,9 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
if (STM.isMesaKernel(F) &&
(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
- amd_kernel_code_t KernelCode;
+ MCKernelCodeT KernelCode;
getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
+ KernelCode.validate(&STM, MF->getContext());
getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
}
@@ -1320,7 +1321,7 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
}
}
-void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
+void AMDGPUAsmPrinter::getAmdKernelCode(MCKernelCodeT &Out,
const SIProgramInfo &CurrentProgramInfo,
const MachineFunction &MF) const {
const Function &F = MF.getFunction();
@@ -1331,59 +1332,62 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
MCContext &Ctx = MF.getContext();
- AMDGPU::initDefaultAMDKernelCodeT(Out, &STM);
+ AMDGPU::initDefaultAMDKernelCodeT(Out.KernelCode, &STM);
- Out.compute_pgm_resource_registers =
- CurrentProgramInfo.getComputePGMRSrc1(STM) |
- (CurrentProgramInfo.getComputePGMRSrc2() << 32);
- Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
+ Out.compute_pgm_resource1_registers =
+ CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx);
+ Out.compute_pgm_resource2_registers =
+ CurrentProgramInfo.getComputePGMRSrc2(Ctx);
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
- if (getMCExprValue(CurrentProgramInfo.DynamicCallStack, Ctx))
- Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK;
+ {
+ const MCExpr *Shift = MCConstantExpr::create(AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT, Ctx);
+ Out.is_dynamic_callstack = MCBinaryExpr::createShl(
+ CurrentProgramInfo.DynamicCallStack, Shift, Ctx);
+ }
- AMD_HSA_BITS_SET(Out.code_properties,
+ AMD_HSA_BITS_SET(Out.KernelCode.code_properties,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();
if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
- Out.code_properties |=
+ Out.KernelCode.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
}
if (UserSGPRInfo.hasDispatchPtr())
- Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5)
- Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
if (UserSGPRInfo.hasKernargSegmentPtr())
- Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
if (UserSGPRInfo.hasDispatchID())
- Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
if (UserSGPRInfo.hasFlatScratchInit())
- Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
if (UserSGPRInfo.hasDispatchPtr())
- Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (STM.isXNACKEnabled())
- Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
Align MaxKernArgAlign;
- Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
- Out.wavefront_sgpr_count = getMCExprValue(CurrentProgramInfo.NumSGPR, Ctx);
- Out.workitem_vgpr_count = getMCExprValue(CurrentProgramInfo.NumVGPR, Ctx);
- Out.workitem_private_segment_byte_size =
- getMCExprValue(CurrentProgramInfo.ScratchSize, Ctx);
- Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
+ Out.KernelCode.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
+ Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
+ Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
+ Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
+ Out.KernelCode.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
// kernarg_segment_alignment is specified as log of the alignment.
// The minimum alignment is 16.
// FIXME: The metadata treats the minimum as 4?
- Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
+ Out.KernelCode.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
}
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 16d8952a533e..c5abbd3c8c08 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -17,8 +17,6 @@
#include "SIProgramInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
-struct amd_kernel_code_t;
-
namespace llvm {
class AMDGPUMachineFunction;
@@ -29,6 +27,7 @@ class MCOperand;
namespace AMDGPU {
struct MCKernelDescriptor;
+struct MCKernelCodeT;
namespace HSAMD {
class MetadataStreamer;
}
@@ -50,7 +49,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
- void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
+ void getAmdKernelCode(AMDGPU::MCKernelCodeT &Out, const SIProgramInfo &KernelInfo,
const MachineFunction &MF) const;
/// Emit register usage information so that the GPU driver
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index d47a5f8ebb81..b8bdf816a993 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -8,6 +8,7 @@
#include "AMDKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCExpr.h"
+#include "MCTargetDesc/AMDGPUMCKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
@@ -1340,7 +1341,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool ParseDirectiveAMDGCNTarget();
bool ParseDirectiveAMDHSACodeObjectVersion();
bool ParseDirectiveAMDHSAKernel();
- bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
+ bool ParseAMDKernelCodeTValue(StringRef ID, MCKernelCodeT &Header);
bool ParseDirectiveAMDKernelCodeT();
// TODO: Possibly make subtargetHasRegister const.
bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
@@ -5872,8 +5873,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
return false;
}
-bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
- amd_kernel_code_t &Header) {
+bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, MCKernelCodeT &C) {
// max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
// assembly for backwards compatibility.
if (ID == "max_scratch_backing_memory_byte_size") {
@@ -5883,25 +5883,14 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
SmallString<40> ErrStr;
raw_svector_ostream Err(ErrStr);
- if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
+ if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
return TokError(Err.str());
}
Lex();
- if (ID == "enable_dx10_clamp") {
- if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
- isGFX12Plus())
- return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
- }
-
- if (ID == "enable_ieee_mode") {
- if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
- isGFX12Plus())
- return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
- }
-
if (ID == "enable_wavefront_size32") {
- if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
+ if (C.KernelCode.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
if (!isGFX10Plus())
return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
@@ -5913,41 +5902,23 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
}
if (ID == "wavefront_size") {
- if (Header.wavefront_size == 5) {
+ if (C.KernelCode.wavefront_size == 5) {
if (!isGFX10Plus())
return TokError("wavefront_size=5 is only allowed on GFX10+");
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
return TokError("wavefront_size=5 requires +WavefrontSize32");
- } else if (Header.wavefront_size == 6) {
+ } else if (C.KernelCode.wavefront_size == 6) {
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
return TokError("wavefront_size=6 requires +WavefrontSize64");
}
}
- if (ID == "enable_wgp_mode") {
- if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
- !isGFX10Plus())
- return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
- }
-
- if (ID == "enable_mem_ordered") {
- if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
- !isGFX10Plus())
- return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
- }
-
- if (ID == "enable_fwd_progress") {
- if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
- !isGFX10Plus())
- return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
- }
-
return false;
}
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
- amd_kernel_code_t Header;
- AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
+ MCKernelCodeT KernelCode;
+ KernelCode.initDefault(&getSTI(), getContext());
while (true) {
// Lex EndOfStatement. This is in a while loop, because lexing a comment
@@ -5961,11 +5932,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
if (ID == ".end_amd_kernel_code_t")
break;
- if (ParseAMDKernelCodeTValue(ID, Header))
+ if (ParseAMDKernelCodeTValue(ID, KernelCode))
return true;
}
- getTargetStreamer().EmitAMDKernelCodeT(Header);
+ KernelCode.validate(&getSTI(), getContext());
+ getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
new file mode 100644
index 000000000000..7c081d98dadb
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
@@ -0,0 +1,549 @@
+//===--- AMDHSAKernelCodeT.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCKernelCodeT.h"
+#include "AMDKernelCodeT.h"
+#include "SIDefines.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::AMDGPU;
+
+// Generates the following for MCKernelCodeT struct members:
+// - HasMemberXXXXX class
+// A check to see if MCKernelCodeT has a specific member so it can determine
+// which of the original amd_kernel_code_t members are duplicated (if the
+// names don't match, the table driven strategy won't work).
+// - GetMemberXXXXX class
+// A retrieval helper for said member (of type const MCExpr *&). Will return
+// a `Phony` const MCExpr * initialized to nullptr to preserve reference
+// returns.
+#define GEN_HAS_MEMBER(member) \
+ class HasMember##member { \
+ private: \
+ struct KnownWithMember { \
+ int member; \
+ }; \
+ class AmbiguousDerived : public MCKernelCodeT, public KnownWithMember {}; \
+ template <typename U> \
+ static constexpr std::false_type Test(decltype(U::member) *); \
+ template <typename U> static constexpr std::true_type Test(...); \
+ \
+ public: \
+ static constexpr bool RESULT = \
+ std::is_same_v<decltype(Test<AmbiguousDerived>(nullptr)), \
+ std::true_type>; \
+ }; \
+ class GetMember##member { \
+ public: \
+ static const MCExpr *Phony; \
+ template <typename U, typename std::enable_if_t<HasMember##member::RESULT, \
+ U> * = nullptr> \
+ static const MCExpr *&Get(U &C) { \
+ assert(HasMember##member::RESULT && \
+ "Trying to retrieve member that does not exist."); \
+ return C.member; \
+ } \
+ template <typename U, typename std::enable_if_t< \
+ !HasMember##member::RESULT, U> * = nullptr> \
+ static const MCExpr *&Get(U &C) { \
+ return Phony; \
+ } \
+ }; \
+ const MCExpr *GetMember##member::Phony = nullptr;
+
+// Cannot generate class declarations using the table driver approach (see table
+// in AMDKernelCodeTInfo.h). Luckily, if any are missing here or eventually
+// added to the table, an error should occur when trying to retrieve the table
+// in getMCExprIndexTable.
+GEN_HAS_MEMBER(amd_code_version_major)
+GEN_HAS_MEMBER(amd_code_version_minor)
+GEN_HAS_MEMBER(amd_machine_kind)
+GEN_HAS_MEMBER(amd_machine_version_major)
+GEN_HAS_MEMBER(amd_machine_version_minor)
+GEN_HAS_MEMBER(amd_machine_version_stepping)
+
+GEN_HAS_MEMBER(kernel_code_entry_byte_offset)
+GEN_HAS_MEMBER(kernel_code_prefetch_byte_size)
+
+GEN_HAS_MEMBER(granulated_workitem_vgpr_count)
+GEN_HAS_MEMBER(granulated_wavefront_sgpr_count)
+GEN_HAS_MEMBER(priority)
+GEN_HAS_MEMBER(float_mode)
+GEN_HAS_MEMBER(priv)
+GEN_HAS_MEMBER(enable_dx10_clamp)
+GEN_HAS_MEMBER(debug_mode)
+GEN_HAS_MEMBER(enable_ieee_mode)
+GEN_HAS_MEMBER(enable_wgp_mode)
+GEN_HAS_MEMBER(enable_mem_ordered)
+GEN_HAS_MEMBER(enable_fwd_progress)
+
+GEN_HAS_MEMBER(enable_sgpr_private_segment_wave_byte_offset)
+GEN_HAS_MEMBER(user_sgpr_count)
+GEN_HAS_MEMBER(enable_trap_handler)
+GEN_HAS_MEMBER(enable_sgpr_workgroup_id_x)
+GEN_HAS_MEMBER(enable_sgpr_workgroup_id_y)
+GEN_HAS_MEMBER(enable_sgpr_workgroup_id_z)
+GEN_HAS_MEMBER(enable_sgpr_workgroup_info)
+GEN_HAS_MEMBER(enable_vgpr_workitem_id)
+GEN_HAS_MEMBER(enable_exception_msb)
+GEN_HAS_MEMBER(granulated_lds_size)
+GEN_HAS_MEMBER(enable_exception)
+
+GEN_HAS_MEMBER(enable_sgpr_private_segment_buffer)
+GEN_HAS_MEMBER(enable_sgpr_dispatch_ptr)
+GEN_HAS_MEMBER(enable_sgpr_queue_ptr)
+GEN_HAS_MEMBER(enable_sgpr_kernarg_segment_ptr)
+GEN_HAS_MEMBER(enable_sgpr_dispatch_id)
+GEN_HAS_MEMBER(enable_sgpr_flat_scratch_init)
+GEN_HAS_MEMBER(enable_sgpr_private_segment_size)
+GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_x)
+GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_y)
+GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_z)
+GEN_HAS_MEMBER(enable_wavefront_size32)
+GEN_HAS_MEMBER(enable_ordered_append_gds)
+GEN_HAS_MEMBER(private_element_size)
+GEN_HAS_MEMBER(is_ptr64)
+GEN_HAS_MEMBER(is_dynamic_callstack)
+GEN_HAS_MEMBER(is_debug_enabled)
+GEN_HAS_MEMBER(is_xnack_enabled)
+
+GEN_HAS_MEMBER(workitem_private_segment_byte_size)
+GEN_HAS_MEMBER(workgroup_group_segment_byte_size)
+GEN_HAS_MEMBER(gds_segment_byte_size)
+GEN_HAS_MEMBER(kernarg_segment_byte_size)
+GEN_HAS_MEMBER(workgroup_fbarrier_count)
+GEN_HAS_MEMBER(wavefront_sgpr_count)
+GEN_HAS_MEMBER(workitem_vgpr_count)
+GEN_HAS_MEMBER(reserved_vgpr_first)
+GEN_HAS_MEMBER(reserved_vgpr_count)
+GEN_HAS_MEMBER(reserved_sgpr_first)
+GEN_HAS_MEMBER(reserved_sgpr_count)
+GEN_HAS_MEMBER(debug_wavefront_private_segment_offset_sgpr)
+GEN_HAS_MEMBER(debug_private_segment_buffer_sgpr)
+GEN_HAS_MEMBER(kernarg_segment_alignment)
+GEN_HAS_MEMBER(group_segment_alignment)
+GEN_HAS_MEMBER(private_segment_alignment)
+GEN_HAS_MEMBER(wavefront_size)
+GEN_HAS_MEMBER(call_convention)
+GEN_HAS_MEMBER(runtime_loader_kernel_symbol)
+
+static ArrayRef<StringRef> get_amd_kernel_code_t_FldNames() {
+ static StringRef const Table[] = {
+ "", // not found placeholder
+#define RECORD(name, altName, print, parse) #name
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return ArrayRef(Table);
+}
+
+static ArrayRef<StringRef> get_amd_kernel_code_t_FldAltNames() {
+ static StringRef const Table[] = {
+ "", // not found placeholder
+#define RECORD(name, altName, print, parse) #altName
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return ArrayRef(Table);
+}
+
+static ArrayRef<bool> hasMCExprVersionTable() {
+ static bool const Table[] = {
+#define RECORD(name, altName, print, parse) (HasMember##name::RESULT)
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return ArrayRef(Table);
+}
+
+static ArrayRef<std::reference_wrapper<const MCExpr *>>
+getMCExprIndexTable(MCKernelCodeT &C) {
+ static std::reference_wrapper<const MCExpr *> Table[] = {
+#define RECORD(name, altName, print, parse) GetMember##name::Get(C)
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return ArrayRef(Table);
+}
+
+static StringMap<int> createIndexMap(const ArrayRef<StringRef> &names,
+ const ArrayRef<StringRef> &altNames) {
+ StringMap<int> map;
+ assert(names.size() == altNames.size());
+ for (unsigned i = 0; i < names.size(); ++i) {
+ map.insert(std::pair(names[i], i));
+ map.insert(std::pair(altNames[i], i));
+ }
+ return map;
+}
+
+static int get_amd_kernel_code_t_FieldIndex(StringRef name) {
+ static const auto map = createIndexMap(get_amd_kernel_code_t_FldNames(),
+ get_amd_kernel_code_t_FldAltNames());
+ return map.lookup(name) - 1; // returns -1 if not found
+}
+
+static constexpr std::pair<unsigned, unsigned> getShiftMask(unsigned Value) {
+ unsigned Shift = 0;
+ unsigned Mask = 0;
+
+ Mask = ~Value;
+ for (; !(Mask & 1); Shift++, Mask >>= 1) {
+ }
+
+ return std::make_pair(Shift, Mask);
+}
+
+static const MCExpr *MaskShiftSet(const MCExpr *Val, uint32_t Mask,
+ uint32_t Shift, MCContext &Ctx) {
+ if (Mask) {
+ const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
+ Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
+ }
+ if (Shift) {
+ const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
+ Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx);
+ }
+ return Val;
+}
+
+static const MCExpr *MaskShiftGet(const MCExpr *Val, uint32_t Mask,
+ uint32_t Shift, MCContext &Ctx) {
+ if (Shift) {
+ const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
+ Val = MCBinaryExpr::createLShr(Val, ShiftExpr, Ctx);
+ }
+ if (Mask) {
+ const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
+ Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
+ }
+ return Val;
+}
+
+template <typename T, T amd_kernel_code_t::*ptr>
+static void printField(StringRef Name, const MCKernelCodeT &C, raw_ostream &OS,
+ MCContext &Ctx) {
+ (void)Ctx;
+ OS << Name << " = ";
+ OS << (int)(C.KernelCode.*ptr);
+}
+
+template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
+static void printBitField(StringRef Name, const MCKernelCodeT &C,
+ raw_ostream &OS, MCContext &Ctx) {
+ (void)Ctx;
+ const auto Mask = (static_cast<T>(1) << width) - 1;
+ OS << Name << " = ";
+ OS << (int)((C.KernelCode.*ptr >> shift) & Mask);
+}
+
+using PrintFx = void (*)(StringRef, const MCKernelCodeT &, raw_ostream &,
+ MCContext &);
+
+static ArrayRef<PrintFx> getPrinterTable() {
+ static const PrintFx Table[] = {
+#define COMPPGM1(name, aname, AccMacro) \
+ COMPPGM(name, aname, C_00B848_##AccMacro, S_00B848_##AccMacro, 0)
+#define COMPPGM2(name, aname, AccMacro) \
+ COMPPGM(name, aname, C_00B84C_##AccMacro, S_00B84C_##AccMacro, 32)
+#define PRINTCOMP(Complement, PGMType) \
+ [](StringRef Name, const MCKernelCodeT &C, raw_ostream &OS, \
+ MCContext &Ctx) { \
+ OS << Name << " = "; \
+ auto [Shift, Mask] = getShiftMask(Complement); \
+ const MCExpr *Value; \
+ if (PGMType == 0) { \
+ Value = \
+ MaskShiftGet(C.compute_pgm_resource1_registers, Mask, Shift, Ctx); \
+ } else { \
+ Value = \
+ MaskShiftGet(C.compute_pgm_resource2_registers, Mask, Shift, Ctx); \
+ } \
+ int64_t Val; \
+ if (Value->evaluateAsAbsolute(Val)) \
+ OS << Val; \
+ else \
+ Value->print(OS, Ctx.getAsmInfo()); \
+ }
+#define RECORD(name, altName, print, parse) print
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return ArrayRef(Table);
+}
+
+static bool expectAbsExpression(MCAsmParser &MCParser, int64_t &Value,
+ raw_ostream &Err) {
+
+ if (MCParser.getLexer().isNot(AsmToken::Equal)) {
+ Err << "expected '='";
+ return false;
+ }
+ MCParser.getLexer().Lex();
+
+ if (MCParser.parseAbsoluteExpression(Value)) {
+ Err << "integer absolute expression expected";
+ return false;
+ }
+ return true;
+}
+
+template <typename T, T amd_kernel_code_t::*ptr>
+static bool parseField(MCKernelCodeT &C, MCAsmParser &MCParser,
+ raw_ostream &Err) {
+ int64_t Value = 0;
+ if (!expectAbsExpression(MCParser, Value, Err))
+ return false;
+ C.KernelCode.*ptr = (T)Value;
+ return true;
+}
+
+template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
+static bool parseBitField(MCKernelCodeT &C, MCAsmParser &MCParser,
+ raw_ostream &Err) {
+ int64_t Value = 0;
+ if (!expectAbsExpression(MCParser, Value, Err))
+ return false;
+ const uint64_t Mask = ((UINT64_C(1) << width) - 1) << shift;
+ C.KernelCode.*ptr &= (T)~Mask;
+ C.KernelCode.*ptr |= (T)((Value << shift) & Mask);
+ return true;
+}
+
+static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value,
+ raw_ostream &Err) {
+ if (MCParser.getLexer().isNot(AsmToken::Equal)) {
+ Err << "expected '='";
+ return false;
+ }
+ MCParser.getLexer().Lex();
+
+ if (MCParser.parseExpression(Value)) {
+ Err << "Could not parse expression";
+ return false;
+ }
+ return true;
+}
+
+using ParseFx = bool (*)(MCKernelCodeT &, MCAsmParser &, raw_ostream &);
+
+static ArrayRef<ParseFx> getParserTable() {
+ static const ParseFx Table[] = {
+#define COMPPGM1(name, aname, AccMacro) \
+ COMPPGM(name, aname, G_00B848_##AccMacro, C_00B848_##AccMacro, 0)
+#define COMPPGM2(name, aname, AccMacro) \
+ COMPPGM(name, aname, G_00B84C_##AccMacro, C_00B84C_##AccMacro, 32)
+#define PARSECOMP(Complement, PGMType) \
+ [](MCKernelCodeT &C, MCAsmParser &MCParser, raw_ostream &Err) -> bool { \
+ MCContext &Ctx = MCParser.getContext(); \
+ const MCExpr *Value; \
+ if (!parseExpr(MCParser, Value, Err)) \
+ return false; \
+ auto [Shift, Mask] = getShiftMask(Complement); \
+ Value = MaskShiftSet(Value, Mask, Shift, Ctx); \
+ const MCExpr *Compl = MCConstantExpr::create(Complement, Ctx); \
+ if (PGMType == 0) { \
+ C.compute_pgm_resource1_registers = MCBinaryExpr::createAnd( \
+ C.compute_pgm_resource1_registers, Compl, Ctx); \
+ C.compute_pgm_resource1_registers = MCBinaryExpr::createOr( \
+ C.compute_pgm_resource1_registers, Value, Ctx); \
+ } else { \
+ C.compute_pgm_resource2_registers = MCBinaryExpr::createAnd( \
+ C.compute_pgm_resource2_registers, Compl, Ctx); \
+ C.compute_pgm_resource2_registers = MCBinaryExpr::createOr( \
+ C.compute_pgm_resource2_registers, Value, Ctx); \
+ } \
+ return true; \
+ }
+#define RECORD(name, altName, print, parse) parse
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return ArrayRef(Table);
+}
+
+static void printAmdKernelCodeField(const MCKernelCodeT &C, int FldIndex,
+ raw_ostream &OS, MCContext &Ctx) {
+ auto Printer = getPrinterTable()[FldIndex];
+ if (Printer)
+ Printer(get_amd_kernel_code_t_FldNames()[FldIndex + 1], C, OS, Ctx);
+}
+
+void MCKernelCodeT::initDefault(const MCSubtargetInfo *STI, MCContext &Ctx) {
+ AMDGPU::initDefaultAMDKernelCodeT(KernelCode, STI);
+ const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
+ compute_pgm_resource1_registers = MCConstantExpr::create(
+ KernelCode.compute_pgm_resource_registers & 0xFFFFFFFF, Ctx);
+ compute_pgm_resource2_registers = MCConstantExpr::create(
+ (KernelCode.compute_pgm_resource_registers >> 32) & 0xffffffff, Ctx);
+ is_dynamic_callstack = ZeroExpr;
+ wavefront_sgpr_count = ZeroExpr;
+ workitem_vgpr_count = ZeroExpr;
+ workitem_private_segment_byte_size = ZeroExpr;
+}
+
+void MCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) {
+ int64_t Value;
+ if (!compute_pgm_resource1_registers->evaluateAsAbsolute(Value))
+ return;
+
+ if (G_00B848_DX10_CLAMP(Value) && AMDGPU::isGFX12Plus(*STI)) {
+ Ctx.reportError({}, "enable_dx10_clamp=1 is not allowed on GFX12+");
+ return;
+ }
+
+ if (G_00B848_IEEE_MODE(Value) && AMDGPU::isGFX12Plus(*STI)) {
+ Ctx.reportError({}, "enable_ieee_mode=1 is not allowed on GFX12+");
+ return;
+ }
+
+ if (G_00B848_WGP_MODE(Value) && !AMDGPU::isGFX10Plus(*STI)) {
+ Ctx.reportError({}, "enable_wgp_mode=1 is only allowed on GFX10+");
+ return;
+ }
+
+ if (G_00B848_MEM_ORDERED(Value) && !AMDGPU::isGFX10Plus(*STI)) {
+ Ctx.reportError({}, "enable_mem_ordered=1 is only allowed on GFX10+");
+ return;
+ }
+
+ if (G_00B848_FWD_PROGRESS(Value) && !AMDGPU::isGFX10Plus(*STI)) {
+ Ctx.reportError({}, "enable_fwd_progress=1 is only allowed on GFX10+");
+ return;
+ }
+}
+
+const MCExpr *&MCKernelCodeT::getMCExprForIndex(int Index) {
+ auto IndexTable = getMCExprIndexTable(*this);
+ return IndexTable[Index].get();
+}
+
+bool MCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser,
+ raw_ostream &Err) {
+ const int Idx = get_amd_kernel_code_t_FieldIndex(ID);
+ if (Idx < 0) {
+ Err << "unexpected amd_kernel_code_t field name " << ID;
+ return false;
+ }
+
+ if (hasMCExprVersionTable()[Idx]) {
+ const MCExpr *Value;
+ if (!parseExpr(MCParser, Value, Err))
+ return false;
+ getMCExprForIndex(Idx) = Value;
+ return true;
+ }
+ auto Parser = getParserTable()[Idx];
+ return Parser ? Parser(*this, MCParser, Err) : false;
+}
+
+void MCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, const char *tab,
+ MCContext &Ctx) {
+ const int Size = hasMCExprVersionTable().size();
+ for (int i = 0; i < Size; ++i) {
+ OS << tab;
+ if (hasMCExprVersionTable()[i]) {
+ OS << get_amd_kernel_code_t_FldNames()[i + 1] << " = ";
+ int64_t Val;
+ const MCExpr *Value = getMCExprForIndex(i);
+ if (Value->evaluateAsAbsolute(Val))
+ OS << Val;
+ else
+ Value->print(OS, Ctx.getAsmInfo());
+ } else {
+ printAmdKernelCodeField(*this, i, OS, Ctx);
+ }
+ OS << '\n';
+ }
+}
+
+void MCKernelCodeT::EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx) {
+ OS.emitIntValue(KernelCode.amd_kernel_code_version_major, /*Size=*/4);
+ OS.emitIntValue(KernelCode.amd_kernel_code_version_minor, /*Size=*/4);
+ OS.emitIntValue(KernelCode.amd_machine_kind, /*Size=*/2);
+ OS.emitIntValue(KernelCode.amd_machine_version_major, /*Size=*/2);
+ OS.emitIntValue(KernelCode.amd_machine_version_minor, /*Size=*/2);
+ OS.emitIntValue(KernelCode.amd_machine_version_stepping, /*Size=*/2);
+ OS.emitIntValue(KernelCode.kernel_code_entry_byte_offset, /*Size=*/8);
+ OS.emitIntValue(KernelCode.kernel_code_prefetch_byte_offset, /*Size=*/8);
+ OS.emitIntValue(KernelCode.kernel_code_prefetch_byte_size, /*Size=*/8);
+ OS.emitIntValue(KernelCode.reserved0, /*Size=*/8);
+
+ if (compute_pgm_resource1_registers != nullptr)
+ OS.emitValue(compute_pgm_resource1_registers, /*Size=*/4);
+ else
+ OS.emitIntValue(KernelCode.compute_pgm_resource_registers & 0xFFFFFFFF,
+ /*Size=*/4);
+
+ if (compute_pgm_resource2_registers != nullptr)
+ OS.emitValue(compute_pgm_resource2_registers, /*Size=*/4);
+ else
+ OS.emitIntValue((KernelCode.compute_pgm_resource_registers >> 32) &
+ 0xFFFFFFFF,
+ /*Size=*/4);
+
+ if (is_dynamic_callstack != nullptr) {
+ const MCExpr *CodeProps =
+ MCConstantExpr::create(KernelCode.code_properties, Ctx);
+ CodeProps = MCBinaryExpr::createOr(
+ CodeProps,
+ MaskShiftSet(is_dynamic_callstack,
+ (1 << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH) - 1,
+ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT, Ctx),
+ Ctx);
+ OS.emitValue(CodeProps, /*Size=*/4);
+ } else
+ OS.emitIntValue(KernelCode.code_properties, /*Size=*/4);
+
+ if (workitem_private_segment_byte_size != nullptr)
+ OS.emitValue(workitem_private_segment_byte_size, /*Size=*/4);
+ else
+ OS.emitIntValue(KernelCode.workitem_private_segment_byte_size, /*Size=*/4);
+
+ OS.emitIntValue(KernelCode.workgroup_group_segment_byte_size, /*Size=*/4);
+ OS.emitIntValue(KernelCode.gds_segment_byte_size, /*Size=*/4);
+ OS.emitIntValue(KernelCode.kernarg_segment_byte_size, /*Size=*/8);
+ OS.emitIntValue(KernelCode.workgroup_fbarrier_count, /*Size=*/4);
+
+ if (wavefront_sgpr_count != nullptr)
+ OS.emitValue(wavefront_sgpr_count, /*Size=*/2);
+ else
+ OS.emitIntValue(KernelCode.wavefront_sgpr_count, /*Size=*/2);
+
+ if (workitem_vgpr_count != nullptr)
+ OS.emitValue(workitem_vgpr_count, /*Size=*/2);
+ else
+ OS.emitIntValue(KernelCode.workitem_vgpr_count, /*Size=*/2);
+
+ OS.emitIntValue(KernelCode.reserved_vgpr_first, /*Size=*/2);
+ OS.emitIntValue(KernelCode.reserved_vgpr_count, /*Size=*/2);
+ OS.emitIntValue(KernelCode.reserved_sgpr_first, /*Size=*/2);
+ OS.emitIntValue(KernelCode.reserved_sgpr_count, /*Size=*/2);
+ OS.emitIntValue(KernelCode.debug_wavefront_private_segment_offset_sgpr,
+ /*Size=*/2);
+ OS.emitIntValue(KernelCode.debug_private_segment_buffer_sgpr, /*Size=*/2);
+ OS.emitIntValue(KernelCode.kernarg_segment_alignment, /*Size=*/1);
+ OS.emitIntValue(KernelCode.group_segment_alignment, /*Size=*/1);
+ OS.emitIntValue(KernelCode.private_segment_alignment, /*Size=*/1);
+ OS.emitIntValue(KernelCode.wavefront_size, /*Size=*/1);
+
+ OS.emitIntValue(KernelCode.call_convention, /*Size=*/4);
+ OS.emitBytes(StringRef((const char *)KernelCode.reserved3, /*Size=*/12));
+ OS.emitIntValue(KernelCode.runtime_loader_kernel_symbol, /*Size=*/8);
+ OS.emitBytes(
+ StringRef((const char *)KernelCode.control_directives, /*Size=*/16 * 8));
+}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
new file mode 100644
index 000000000000..66c5d1107487
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
@@ -0,0 +1,59 @@
+//===--- AMDGPUMCKernelCodeT.h --------------------------------*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// MC layer struct for amd_kernel_code_t, provides MCExpr functionality where
+/// required.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELCODET_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELCODET_H
+
+#include "AMDKernelCodeT.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+class MCAsmParser;
+class MCContext;
+class MCExpr;
+class MCStreamer;
+class MCSubtargetInfo;
+class raw_ostream;
+namespace AMDGPU {
+
+struct MCKernelCodeT {
+ MCKernelCodeT() = default;
+
+ amd_kernel_code_t KernelCode;
+ const MCExpr *compute_pgm_resource1_registers = nullptr;
+ const MCExpr *compute_pgm_resource2_registers = nullptr;
+
+ // Duplicated fields, but uses MCExpr instead.
+ // Name has to be the same as the ones used in AMDKernelCodeTInfo.h.
+ const MCExpr *is_dynamic_callstack = nullptr;
+ const MCExpr *wavefront_sgpr_count = nullptr;
+ const MCExpr *workitem_vgpr_count = nullptr;
+ const MCExpr *workitem_private_segment_byte_size = nullptr;
+
+ void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx);
+ void validate(const MCSubtargetInfo *STI, MCContext &Ctx);
+
+ const MCExpr *&getMCExprForIndex(int Index);
+
+ bool ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser, raw_ostream &Err);
+ void EmitKernelCodeT(raw_ostream &OS, const char *tab, MCContext &Ctx);
+ void EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx);
+};
+
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELCODET_H
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 02fe7be06280..e58409e8b0cb 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -11,9 +11,9 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetStreamer.h"
+#include "AMDGPUMCKernelCodeT.h"
#include "AMDGPUMCKernelDescriptor.h"
#include "AMDGPUPTNote.h"
-#include "AMDKernelCodeT.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
@@ -240,10 +240,9 @@ void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
OS << "\t.amdhsa_code_object_version " << COV << '\n';
}
-void
-AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
+void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(MCKernelCodeT &Header) {
OS << "\t.amd_kernel_code_t\n";
- dumpAmdKernelCode(&Header, OS, "\t\t");
+ Header.EmitKernelCodeT(OS, "\t\t", getContext());
OS << "\t.end_amd_kernel_code_t\n";
}
@@ -789,12 +788,10 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
-void
-AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
-
+void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(MCKernelCodeT &Header) {
MCStreamer &OS = getStreamer();
OS.pushSection();
- OS.emitBytes(StringRef((const char*)&Header, sizeof(Header)));
+ Header.EmitKernelCodeT(OS, getContext());
OS.popSection();
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 706897a5dc1f..ea5d1d379f78 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -13,8 +13,6 @@
#include "Utils/AMDGPUPALMetadata.h"
#include "llvm/MC/MCStreamer.h"
-struct amd_kernel_code_t;
-
namespace llvm {
class MCELFStreamer;
@@ -23,6 +21,7 @@ class formatted_raw_ostream;
namespace AMDGPU {
+struct MCKernelCodeT;
struct MCKernelDescriptor;
namespace HSAMD {
struct Metadata;
@@ -54,7 +53,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
CodeObjectVersion = COV;
}
- virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header){};
+ virtual void EmitAMDKernelCodeT(AMDGPU::MCKernelCodeT &Header){};
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type){};
@@ -130,7 +129,7 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override;
- void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
+ void EmitAMDKernelCodeT(AMDGPU::MCKernelCodeT &Header) override;
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
@@ -186,7 +185,7 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
void EmitDirectiveAMDGCNTarget() override;
- void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
+ void EmitAMDKernelCodeT(AMDGPU::MCKernelCodeT &Header) override;
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
index 14a02b6d8e36..5ff44ee70afa 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_component_library(LLVMAMDGPUDesc
AMDGPUMCExpr.cpp
AMDGPUMCTargetDesc.cpp
AMDGPUTargetStreamer.cpp
+ AMDGPUMCKernelCodeT.cpp
AMDGPUMCKernelDescriptor.cpp
R600InstPrinter.cpp
R600MCCodeEmitter.cpp
@@ -20,6 +21,7 @@ add_llvm_component_library(LLVMAMDGPUDesc
CodeGenTypes
Core
MC
+ MCParser
Support
TargetParser
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 6d0e0b3f4de2..1e9bfc77ab92 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -1111,7 +1111,7 @@ enum Type { TRAP = -2, WORKGROUP = -1 };
#define C_00B84C_LDS_SIZE 0xFF007FFF
#define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24)
#define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F)
-#define C_00B84C_EXCP_EN
+#define C_00B84C_EXCP_EN 0x80FFFFFF
#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
#define R_0286D0_SPI_PS_INPUT_ADDR 0x0286D0
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
index 95ad3f35d18f..2a9fa804bc89 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
@@ -15,31 +15,44 @@
#define QNAME(name) amd_kernel_code_t::name
#define FLD_T(name) decltype(QNAME(name)), &QNAME(name)
+#ifndef FIELD2
#define FIELD2(sname, aname, name) \
RECORD(sname, aname, printField<FLD_T(name)>, parseField<FLD_T(name)>)
+#endif
+#ifndef FIELD
#define FIELD(name) FIELD2(name, name, name)
+#endif
-
+#ifndef PRINTCODEPROP
#define PRINTCODEPROP(name) \
printBitField<FLD_T(code_properties),\
AMD_CODE_PROPERTY_##name##_SHIFT,\
AMD_CODE_PROPERTY_##name##_WIDTH>
+#endif
+#ifndef PARSECODEPROP
#define PARSECODEPROP(name) \
parseBitField<FLD_T(code_properties),\
AMD_CODE_PROPERTY_##name##_SHIFT,\
AMD_CODE_PROPERTY_##name##_WIDTH>
+#endif
+#ifndef CODEPROP
#define CODEPROP(name, shift) \
RECORD(name, name, PRINTCODEPROP(shift), PARSECODEPROP(shift))
+#endif
// have to define these lambdas because of Set/GetMacro
+#ifndef PRINTCOMP
#define PRINTCOMP(GetMacro, Shift) \
[](StringRef Name, const amd_kernel_code_t &C, raw_ostream &OS) { \
printName(OS, Name) << \
(int)GetMacro(C.compute_pgm_resource_registers >> Shift); \
}
+#endif
+
+#ifndef PARSECOMP
#define PARSECOMP(SetMacro, Shift) \
[](amd_kernel_code_t &C, MCAsmParser &MCParser, raw_ostream &Err) { \
int64_t Value = 0; \
@@ -49,15 +62,22 @@
C.compute_pgm_resource_registers |= SetMacro(Value) << Shift; \
return true; \
}
+#endif
+#ifndef COMPPGM
#define COMPPGM(name, aname, GetMacro, SetMacro, Shift) \
RECORD(name, aname, PRINTCOMP(GetMacro, Shift), PARSECOMP(SetMacro, Shift))
+#endif
+#ifndef COMPPGM1
#define COMPPGM1(name, aname, AccMacro) \
COMPPGM(name, aname, G_00B848_##AccMacro, S_00B848_##AccMacro, 0)
+#endif
+#ifndef COMPPGM2
#define COMPPGM2(name, aname, AccMacro) \
COMPPGM(name, aname, G_00B84C_##AccMacro, S_00B84C_##AccMacro, 32)
+#endif
///////////////////////////////////////////////////////////////////////////////
// Begin of the table
@@ -149,7 +169,7 @@ FIELD(runtime_loader_kernel_symbol)
#undef PARSECODEPROP
#undef CODEPROP
#undef PRINTCOMP
-#undef PAPSECOMP
+#undef PARSECOMP
#undef COMPPGM
#undef COMPPGM1
#undef COMPPGM2
diff --git a/llvm/test/MC/AMDGPU/amd_kernel_code_t.s b/llvm/test/MC/AMDGPU/amd_kernel_code_t.s
new file mode 100644
index 000000000000..052ec0bfabb8
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/amd_kernel_code_t.s
@@ -0,0 +1,171 @@
+; RUN: llvm-mc -triple=amdgcn-mesa-mesa3d -mcpu=gfx900 -filetype=asm < %s | FileCheck --check-prefix=ASM %s
+; RUN: llvm-mc -triple=amdgcn-mesa-mesa3d -mcpu=gfx900 -filetype=obj < %s > %t
+; RUN: llvm-objdump -s %t | FileCheck --check-prefix=OBJDUMP %s
+
+; OBJDUMP: Contents of section .known_is_dynamic_callstack:
+; OBJDUMP: 0030 00000000 00000000 00001000 00000000
+
+; OBJDUMP: Contents of section .known_wavefront_sgpr_count:
+; OBJDUMP: 0050 00000000 01000000 00000000 00000000
+
+; OBJDUMP: Contents of section .known_workitem_vgpr_count:
+; OBJDUMP: 0050 00000000 00000100 00000000 00000000
+
+; OBJDUMP: Contents of section .known_workitem_private_segment_byte_size:
+; OBJDUMP: 0030 00000000 00000000 00000000 01000000
+
+; OBJDUMP: Contents of section .known_granulated_workitem_vgpr_count:
+; OBJDUMP: 0030 01000000 00000000 00000000 00000000
+
+; OBJDUMP: Contents of section .known_enable_sgpr_workgroup_id_x:
+; OBJDUMP: 0030 00000000 80000000 00000000 00000000
+
+; OBJDUMP: Contents of section .unknown_is_dynamic_callstack:
+; OBJDUMP: 0030 00000000 00000000 00001000 00000000
+
+; OBJDUMP: Contents of section .unknown_wavefront_sgpr_count:
+; OBJDUMP: 0050 00000000 01000000 00000000 00000000
+
+; OBJDUMP: Contents of section .unknown_workitem_vgpr_count:
+; OBJDUMP: 0050 00000000 00000100 00000000 00000000
+
+; OBJDUMP: Contents of section .unknown_workitem_private_segment_byte_size:
+; OBJDUMP: 0030 00000000 00000000 00000000 01000000
+
+; OBJDUMP: Contents of section .unknown_granulated_workitem_vgpr_count:
+; OBJDUMP: 0030 01000000 00000000 00000000 00000000
+
+; OBJDUMP: Contents of section .unknown_enable_sgpr_workgroup_id_x:
+; OBJDUMP: 0030 00000000 80000000 00000000 00000000
+
+.set known, 1
+
+; ASM-LABEL: known_is_dynamic_callstack:
+; ASM: is_dynamic_callstack = 1
+.section .known_is_dynamic_callstack
+known_is_dynamic_callstack:
+ .amd_kernel_code_t
+ is_dynamic_callstack = known
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: known_wavefront_sgpr_count:
+; ASM: wavefront_sgpr_count = 1
+.section .known_wavefront_sgpr_count
+known_wavefront_sgpr_count:
+ .amd_kernel_code_t
+ wavefront_sgpr_count = known
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: known_workitem_vgpr_count:
+; ASM: workitem_vgpr_count = 1
+.section .known_workitem_vgpr_count
+known_workitem_vgpr_count:
+ .amd_kernel_code_t
+ workitem_vgpr_count = known
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: known_workitem_private_segment_byte_size:
+; ASM: workitem_private_segment_byte_size = 1
+.section .known_workitem_private_segment_byte_size
+known_workitem_private_segment_byte_size:
+ .amd_kernel_code_t
+ workitem_private_segment_byte_size = known
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: known_granulated_workitem_vgpr_count:
+; ASM: granulated_workitem_vgpr_count = 1
+.section .known_granulated_workitem_vgpr_count
+known_granulated_workitem_vgpr_count:
+ .amd_kernel_code_t
+ granulated_workitem_vgpr_count = known
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: known_enable_sgpr_workgroup_id_x:
+; ASM: enable_sgpr_workgroup_id_x = 1
+.section .known_enable_sgpr_workgroup_id_x
+known_enable_sgpr_workgroup_id_x:
+ .amd_kernel_code_t
+ enable_sgpr_workgroup_id_x = known
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: unknown_is_dynamic_callstack:
+; ASM: is_dynamic_callstack = unknown
+.section .unknown_is_dynamic_callstack
+unknown_is_dynamic_callstack:
+ .amd_kernel_code_t
+ is_dynamic_callstack = unknown
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: unknown_wavefront_sgpr_count:
+; ASM: wavefront_sgpr_count = unknown
+.section .unknown_wavefront_sgpr_count
+unknown_wavefront_sgpr_count:
+ .amd_kernel_code_t
+ wavefront_sgpr_count = unknown
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: unknown_workitem_vgpr_count:
+; ASM: workitem_vgpr_count = unknown
+.section .unknown_workitem_vgpr_count
+unknown_workitem_vgpr_count:
+ .amd_kernel_code_t
+ workitem_vgpr_count = unknown
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: unknown_workitem_private_segment_byte_size:
+; ASM: workitem_private_segment_byte_size = unknown
+.section .unknown_workitem_private_segment_byte_size
+unknown_workitem_private_segment_byte_size:
+ .amd_kernel_code_t
+ workitem_private_segment_byte_size = unknown
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: unknown_granulated_workitem_vgpr_count:
+; ASM: granulated_workitem_vgpr_count = ((0&4294967232)|(unknown&63))&63
+; ASM: granulated_wavefront_sgpr_count = (((0&4294967232)|(unknown&63))>>6)&15
+; ASM: priority = (((0&4294967232)|(unknown&63))>>10)&3
+; ASM: float_mode = (((0&4294967232)|(unknown&63))>>12)&255
+; ASM: priv = (((0&4294967232)|(unknown&63))>>20)&1
+; ASM: enable_dx10_clamp = (((0&4294967232)|(unknown&63))>>21)&1
+; ASM: debug_mode = (((0&4294967232)|(unknown&63))>>22)&1
+; ASM: enable_ieee_mode = (((0&4294967232)|(unknown&63))>>23)&1
+; ASM: enable_wgp_mode = (((0&4294967232)|(unknown&63))>>29)&1
+; ASM: enable_mem_ordered = (((0&4294967232)|(unknown&63))>>30)&1
+; ASM: enable_fwd_progress = (((0&4294967232)|(unknown&63))>>31)&1
+.section .unknown_granulated_workitem_vgpr_count
+unknown_granulated_workitem_vgpr_count:
+ .amd_kernel_code_t
+ granulated_workitem_vgpr_count = unknown
+ .end_amd_kernel_code_t
+ s_endpgm
+
+; ASM-LABEL: unknown_enable_sgpr_workgroup_id_x:
+; ASM: enable_sgpr_private_segment_wave_byte_offset = ((0&4294967167)|((unknown&1)<<7))&1
+; ASM: user_sgpr_count = (((0&4294967167)|((unknown&1)<<7))>>1)&31
+; ASM: enable_trap_handler = (((0&4294967167)|((unknown&1)<<7))>>6)&1
+; ASM: enable_sgpr_workgroup_id_x = (((0&4294967167)|((unknown&1)<<7))>>7)&1
+; ASM: enable_sgpr_workgroup_id_y = (((0&4294967167)|((unknown&1)<<7))>>8)&1
+; ASM: enable_sgpr_workgroup_id_z = (((0&4294967167)|((unknown&1)<<7))>>9)&1
+; ASM: enable_sgpr_workgroup_info = (((0&4294967167)|((unknown&1)<<7))>>10)&1
+; ASM: enable_vgpr_workitem_id = (((0&4294967167)|((unknown&1)<<7))>>11)&3
+; ASM: enable_exception_msb = (((0&4294967167)|((unknown&1)<<7))>>13)&3
+; ASM: granulated_lds_size = (((0&4294967167)|((unknown&1)<<7))>>15)&511
+; ASM: enable_exception = (((0&4294967167)|((unknown&1)<<7))>>24)&127
+.section .unknown_enable_sgpr_workgroup_id_x
+unknown_enable_sgpr_workgroup_id_x:
+ .amd_kernel_code_t
+ enable_sgpr_workgroup_id_x = unknown
+ .end_amd_kernel_code_t
+ s_endpgm
+
+.set unknown, 1
>From ccdeaf31408e00cd9de1f352102a517368bec733 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Thu, 9 May 2024 17:05:15 +0100
Subject: [PATCH 2/3] Formatting, rename struct, minor feedback changes
---
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 27 +++++---
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 5 +-
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 7 +-
.../MCTargetDesc/AMDGPUMCKernelCodeT.cpp | 65 +++++++++----------
.../AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h | 6 +-
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 4 +-
.../MCTargetDesc/AMDGPUTargetStreamer.h | 8 +--
7 files changed, 65 insertions(+), 57 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 8343d3d83d22..f4a5bd10d557 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -205,7 +205,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
if (STM.isMesaKernel(F) &&
(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
- MCKernelCodeT KernelCode;
+ AMDGPUMCKernelCodeT KernelCode;
getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
KernelCode.validate(&STM, MF->getContext());
getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
@@ -1321,7 +1321,7 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
}
}
-void AMDGPUAsmPrinter::getAmdKernelCode(MCKernelCodeT &Out,
+void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
const SIProgramInfo &CurrentProgramInfo,
const MachineFunction &MF) const {
const Function &F = MF.getFunction();
@@ -1341,7 +1341,8 @@ void AMDGPUAsmPrinter::getAmdKernelCode(MCKernelCodeT &Out,
Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
{
- const MCExpr *Shift = MCConstantExpr::create(AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT, Ctx);
+ const MCExpr *Shift = MCConstantExpr::create(
+ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT, Ctx);
Out.is_dynamic_callstack = MCBinaryExpr::createShl(
CurrentProgramInfo.DynamicCallStack, Shift, Ctx);
}
@@ -1353,32 +1354,37 @@ void AMDGPUAsmPrinter::getAmdKernelCode(MCKernelCodeT &Out,
const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();
if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
Out.KernelCode.code_properties |=
- AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
}
if (UserSGPRInfo.hasDispatchPtr())
- Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ Out.KernelCode.code_properties |=
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5)
Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
if (UserSGPRInfo.hasKernargSegmentPtr())
- Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
+ Out.KernelCode.code_properties |=
+ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
if (UserSGPRInfo.hasDispatchID())
Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
if (UserSGPRInfo.hasFlatScratchInit())
- Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+ Out.KernelCode.code_properties |=
+ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
if (UserSGPRInfo.hasDispatchPtr())
- Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ Out.KernelCode.code_properties |=
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (STM.isXNACKEnabled())
Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
Align MaxKernArgAlign;
- Out.KernelCode.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
+ Out.KernelCode.kernarg_segment_byte_size =
+ STM.getKernArgSegmentSize(F, MaxKernArgAlign);
Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
@@ -1387,7 +1393,8 @@ void AMDGPUAsmPrinter::getAmdKernelCode(MCKernelCodeT &Out,
// kernarg_segment_alignment is specified as log of the alignment.
// The minimum alignment is 16.
// FIXME: The metadata treats the minimum as 4?
- Out.KernelCode.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
+ Out.KernelCode.kernarg_segment_alignment =
+ Log2(std::max(Align(16), MaxKernArgAlign));
}
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index c5abbd3c8c08..87156f27fc6c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -27,7 +27,7 @@ class MCOperand;
namespace AMDGPU {
struct MCKernelDescriptor;
-struct MCKernelCodeT;
+struct AMDGPUMCKernelCodeT;
namespace HSAMD {
class MetadataStreamer;
}
@@ -49,7 +49,8 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
- void getAmdKernelCode(AMDGPU::MCKernelCodeT &Out, const SIProgramInfo &KernelInfo,
+ void getAmdKernelCode(AMDGPU::AMDGPUMCKernelCodeT &Out,
+ const SIProgramInfo &KernelInfo,
const MachineFunction &MF) const;
/// Emit register usage information so that the GPU driver
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index b8bdf816a993..7de2d52a8337 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1341,7 +1341,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool ParseDirectiveAMDGCNTarget();
bool ParseDirectiveAMDHSACodeObjectVersion();
bool ParseDirectiveAMDHSAKernel();
- bool ParseAMDKernelCodeTValue(StringRef ID, MCKernelCodeT &Header);
+ bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
bool ParseDirectiveAMDKernelCodeT();
// TODO: Possibly make subtargetHasRegister const.
bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
@@ -5873,7 +5873,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
return false;
}
-bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, MCKernelCodeT &C) {
+bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
+ AMDGPUMCKernelCodeT &C) {
// max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
// assembly for backwards compatibility.
if (ID == "max_scratch_backing_memory_byte_size") {
@@ -5917,7 +5918,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, MCKernelCodeT &C) {
}
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
- MCKernelCodeT KernelCode;
+ AMDGPUMCKernelCodeT KernelCode;
KernelCode.initDefault(&getSTI(), getContext());
while (true) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
index 7c081d98dadb..72d3bfb48b94 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
@@ -22,11 +22,11 @@
using namespace llvm;
using namespace llvm::AMDGPU;
-// Generates the following for MCKernelCodeT struct members:
+// Generates the following for AMDGPUMCKernelCodeT struct members:
// - HasMemberXXXXX class
-// A check to see if MCKernelCodeT has a specific member so it can determine
-// which of the original amd_kernel_code_t members are duplicated (if the
-// names don't match, the table driven strategy won't work).
+// A check to see if AMDGPUMCKernelCodeT has a specific member so it can
+// determine which of the original amd_kernel_code_t members are duplicated
+// (if the names don't match, the table driven strategy won't work).
// - GetMemberXXXXX class
// A retrieval helper for said member (of type const MCExpr *&). Will return
// a `Phony` const MCExpr * initialized to nullptr to preserve reference
@@ -37,7 +37,8 @@ using namespace llvm::AMDGPU;
struct KnownWithMember { \
int member; \
}; \
- class AmbiguousDerived : public MCKernelCodeT, public KnownWithMember {}; \
+ class AmbiguousDerived : public AMDGPUMCKernelCodeT, \
+ public KnownWithMember {}; \
template <typename U> \
static constexpr std::false_type Test(decltype(U::member) *); \
template <typename U> static constexpr std::true_type Test(...); \
@@ -143,7 +144,7 @@ GEN_HAS_MEMBER(runtime_loader_kernel_symbol)
static ArrayRef<StringRef> get_amd_kernel_code_t_FldNames() {
static StringRef const Table[] = {
- "", // not found placeholder
+ "", // not found placeholder
#define RECORD(name, altName, print, parse) #name
#include "Utils/AMDKernelCodeTInfo.h"
#undef RECORD
@@ -153,7 +154,7 @@ static ArrayRef<StringRef> get_amd_kernel_code_t_FldNames() {
static ArrayRef<StringRef> get_amd_kernel_code_t_FldAltNames() {
static StringRef const Table[] = {
- "", // not found placeholder
+ "", // not found placeholder
#define RECORD(name, altName, print, parse) #altName
#include "Utils/AMDKernelCodeTInfo.h"
#undef RECORD
@@ -171,7 +172,7 @@ static ArrayRef<bool> hasMCExprVersionTable() {
}
static ArrayRef<std::reference_wrapper<const MCExpr *>>
-getMCExprIndexTable(MCKernelCodeT &C) {
+getMCExprIndexTable(AMDGPUMCKernelCodeT &C) {
static std::reference_wrapper<const MCExpr *> Table[] = {
#define RECORD(name, altName, print, parse) GetMember##name::Get(C)
#include "Utils/AMDKernelCodeTInfo.h"
@@ -235,23 +236,19 @@ static const MCExpr *MaskShiftGet(const MCExpr *Val, uint32_t Mask,
}
template <typename T, T amd_kernel_code_t::*ptr>
-static void printField(StringRef Name, const MCKernelCodeT &C, raw_ostream &OS,
- MCContext &Ctx) {
- (void)Ctx;
- OS << Name << " = ";
- OS << (int)(C.KernelCode.*ptr);
+static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C,
+ raw_ostream &OS, MCContext &) {
+ OS << Name << " = " << (int)(C.KernelCode.*ptr);
}
template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
-static void printBitField(StringRef Name, const MCKernelCodeT &C,
- raw_ostream &OS, MCContext &Ctx) {
- (void)Ctx;
+static void printBitField(StringRef Name, const AMDGPUMCKernelCodeT &C,
+ raw_ostream &OS, MCContext &) {
const auto Mask = (static_cast<T>(1) << width) - 1;
- OS << Name << " = ";
- OS << (int)((C.KernelCode.*ptr >> shift) & Mask);
+ OS << Name << " = " << (int)((C.KernelCode.*ptr >> shift) & Mask);
}
-using PrintFx = void (*)(StringRef, const MCKernelCodeT &, raw_ostream &,
+using PrintFx = void (*)(StringRef, const AMDGPUMCKernelCodeT &, raw_ostream &,
MCContext &);
static ArrayRef<PrintFx> getPrinterTable() {
@@ -261,7 +258,7 @@ static ArrayRef<PrintFx> getPrinterTable() {
#define COMPPGM2(name, aname, AccMacro) \
COMPPGM(name, aname, C_00B84C_##AccMacro, S_00B84C_##AccMacro, 32)
#define PRINTCOMP(Complement, PGMType) \
- [](StringRef Name, const MCKernelCodeT &C, raw_ostream &OS, \
+ [](StringRef Name, const AMDGPUMCKernelCodeT &C, raw_ostream &OS, \
MCContext &Ctx) { \
OS << Name << " = "; \
auto [Shift, Mask] = getShiftMask(Complement); \
@@ -303,7 +300,7 @@ static bool expectAbsExpression(MCAsmParser &MCParser, int64_t &Value,
}
template <typename T, T amd_kernel_code_t::*ptr>
-static bool parseField(MCKernelCodeT &C, MCAsmParser &MCParser,
+static bool parseField(AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser,
raw_ostream &Err) {
int64_t Value = 0;
if (!expectAbsExpression(MCParser, Value, Err))
@@ -313,7 +310,7 @@ static bool parseField(MCKernelCodeT &C, MCAsmParser &MCParser,
}
template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
-static bool parseBitField(MCKernelCodeT &C, MCAsmParser &MCParser,
+static bool parseBitField(AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser,
raw_ostream &Err) {
int64_t Value = 0;
if (!expectAbsExpression(MCParser, Value, Err))
@@ -339,7 +336,7 @@ static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value,
return true;
}
-using ParseFx = bool (*)(MCKernelCodeT &, MCAsmParser &, raw_ostream &);
+using ParseFx = bool (*)(AMDGPUMCKernelCodeT &, MCAsmParser &, raw_ostream &);
static ArrayRef<ParseFx> getParserTable() {
static const ParseFx Table[] = {
@@ -348,7 +345,8 @@ static ArrayRef<ParseFx> getParserTable() {
#define COMPPGM2(name, aname, AccMacro) \
COMPPGM(name, aname, G_00B84C_##AccMacro, C_00B84C_##AccMacro, 32)
#define PARSECOMP(Complement, PGMType) \
- [](MCKernelCodeT &C, MCAsmParser &MCParser, raw_ostream &Err) -> bool { \
+ [](AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser, \
+ raw_ostream &Err) -> bool { \
MCContext &Ctx = MCParser.getContext(); \
const MCExpr *Value; \
if (!parseExpr(MCParser, Value, Err)) \
@@ -376,14 +374,15 @@ static ArrayRef<ParseFx> getParserTable() {
return ArrayRef(Table);
}
-static void printAmdKernelCodeField(const MCKernelCodeT &C, int FldIndex,
+static void printAmdKernelCodeField(const AMDGPUMCKernelCodeT &C, int FldIndex,
raw_ostream &OS, MCContext &Ctx) {
auto Printer = getPrinterTable()[FldIndex];
if (Printer)
Printer(get_amd_kernel_code_t_FldNames()[FldIndex + 1], C, OS, Ctx);
}
-void MCKernelCodeT::initDefault(const MCSubtargetInfo *STI, MCContext &Ctx) {
+void AMDGPUMCKernelCodeT::initDefault(const MCSubtargetInfo *STI,
+ MCContext &Ctx) {
AMDGPU::initDefaultAMDKernelCodeT(KernelCode, STI);
const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
compute_pgm_resource1_registers = MCConstantExpr::create(
@@ -396,7 +395,7 @@ void MCKernelCodeT::initDefault(const MCSubtargetInfo *STI, MCContext &Ctx) {
workitem_private_segment_byte_size = ZeroExpr;
}
-void MCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) {
+void AMDGPUMCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) {
int64_t Value;
if (!compute_pgm_resource1_registers->evaluateAsAbsolute(Value))
return;
@@ -427,13 +426,13 @@ void MCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) {
}
}
-const MCExpr *&MCKernelCodeT::getMCExprForIndex(int Index) {
+const MCExpr *&AMDGPUMCKernelCodeT::getMCExprForIndex(int Index) {
auto IndexTable = getMCExprIndexTable(*this);
return IndexTable[Index].get();
}
-bool MCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser,
- raw_ostream &Err) {
+bool AMDGPUMCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser,
+ raw_ostream &Err) {
const int Idx = get_amd_kernel_code_t_FieldIndex(ID);
if (Idx < 0) {
Err << "unexpected amd_kernel_code_t field name " << ID;
@@ -451,8 +450,8 @@ bool MCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser,
return Parser ? Parser(*this, MCParser, Err) : false;
}
-void MCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, const char *tab,
- MCContext &Ctx) {
+void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, const char *tab,
+ MCContext &Ctx) {
const int Size = hasMCExprVersionTable().size();
for (int i = 0; i < Size; ++i) {
OS << tab;
@@ -471,7 +470,7 @@ void MCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, const char *tab,
}
}
-void MCKernelCodeT::EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx) {
+void AMDGPUMCKernelCodeT::EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx) {
OS.emitIntValue(KernelCode.amd_kernel_code_version_major, /*Size=*/4);
OS.emitIntValue(KernelCode.amd_kernel_code_version_minor, /*Size=*/4);
OS.emitIntValue(KernelCode.amd_machine_kind, /*Size=*/2);
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
index 66c5d1107487..278b0827f07a 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.h
@@ -17,8 +17,8 @@
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELCODET_H
#include "AMDKernelCodeT.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
namespace llvm {
class MCAsmParser;
@@ -29,8 +29,8 @@ class MCSubtargetInfo;
class raw_ostream;
namespace AMDGPU {
-struct MCKernelCodeT {
- MCKernelCodeT() = default;
+struct AMDGPUMCKernelCodeT {
+ AMDGPUMCKernelCodeT() = default;
amd_kernel_code_t KernelCode;
const MCExpr *compute_pgm_resource1_registers = nullptr;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index e58409e8b0cb..efbbe93ceb3c 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -240,7 +240,7 @@ void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
OS << "\t.amdhsa_code_object_version " << COV << '\n';
}
-void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(MCKernelCodeT &Header) {
+void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
OS << "\t.amd_kernel_code_t\n";
Header.EmitKernelCodeT(OS, "\t\t", getContext());
OS << "\t.end_amd_kernel_code_t\n";
@@ -788,7 +788,7 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
-void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(MCKernelCodeT &Header) {
+void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
MCStreamer &OS = getStreamer();
OS.pushSection();
Header.EmitKernelCodeT(OS, getContext());
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index ea5d1d379f78..399e0a7dfd09 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -21,7 +21,7 @@ class formatted_raw_ostream;
namespace AMDGPU {
-struct MCKernelCodeT;
+struct AMDGPUMCKernelCodeT;
struct MCKernelDescriptor;
namespace HSAMD {
struct Metadata;
@@ -53,7 +53,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
CodeObjectVersion = COV;
}
- virtual void EmitAMDKernelCodeT(AMDGPU::MCKernelCodeT &Header){};
+ virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header){};
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type){};
@@ -129,7 +129,7 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override;
- void EmitAMDKernelCodeT(AMDGPU::MCKernelCodeT &Header) override;
+ void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override;
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
@@ -185,7 +185,7 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
void EmitDirectiveAMDGCNTarget() override;
- void EmitAMDKernelCodeT(AMDGPU::MCKernelCodeT &Header) override;
+ void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override;
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
>From 7878171c9051d23246feaa8bbfcd083d23ff49f7 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Thu, 9 May 2024 17:33:50 +0100
Subject: [PATCH 3/3] Formatting that gets undone by local clang-format
---
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 399e0a7dfd09..e5c90060cb5d 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -53,7 +53,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
CodeObjectVersion = COV;
}
- virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header){};
+ virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) {};
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type){};
More information about the llvm-commits
mailing list