[llvm] [AMDGPU] MCExpr-ify MC layer kernel descriptor (PR #80855)
Janek van Oirschot via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 6 07:46:14 PST 2024
https://github.com/JanekvO created https://github.com/llvm/llvm-project/pull/80855
Kernel descriptor attributes, with their respective emit and asm parse functionality, converted to MCExpr. Required for moving function/program resource usage information propagation to MC layer. As a result of this change, some amdhsa directives in assembly can use asm symbols that are defined later than their use.
Furthermore, this change (unintentionally) fixes a latent bug relating to some uses of the AMDHSA_BITS_SET macro for which the VAL argument is not encapsulated and can result in unintended C++ statements (e.g., see how the macro expands for the tg-split and wavefrontsize32 attributes in getDefaultAmdhsaKernelDescriptor).
>From b3e77d2dcbd35ff442cb24dc907ed88dc42bd023 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Tue, 6 Feb 2024 15:29:26 +0000
Subject: [PATCH] MCExpr-ify MC layer kernel descriptor
---
.../llvm/Support/AMDHSAKernelDescriptor.h | 79 ++--
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 30 +-
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 188 +++++----
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 381 +++++++++++-------
.../MCTargetDesc/AMDHSAKernelDescriptor.cpp | 32 ++
.../Target/AMDGPU/MCTargetDesc/CMakeLists.txt | 1 +
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 84 ++--
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 5 +-
llvm/test/MC/AMDGPU/hsa-gfx12-v4.s | 6 +-
llvm/test/MC/AMDGPU/hsa-sym-exprs.s | 68 ++++
llvm/test/MC/AMDGPU/hsa-tg-split.s | 74 ++++
11 files changed, 641 insertions(+), 307 deletions(-)
create mode 100644 llvm/lib/Target/AMDGPU/MCTargetDesc/AMDHSAKernelDescriptor.cpp
create mode 100644 llvm/test/MC/AMDGPU/hsa-sym-exprs.s
create mode 100644 llvm/test/MC/AMDGPU/hsa-tg-split.s
diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
index 84cac3ef700e05..9c5d8fa1c1a607 100644
--- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -52,6 +52,10 @@
#endif // AMDHSA_BITS_SET
namespace llvm {
+
+class MCContext;
+class MCExpr;
+
namespace amdhsa {
// Floating point rounding modes. Must match hardware definition.
@@ -238,18 +242,40 @@ enum : int32_t {
// Kernel descriptor. Must be kept backwards compatible.
struct kernel_descriptor_t {
- uint32_t group_segment_fixed_size;
- uint32_t private_segment_fixed_size;
- uint32_t kernarg_size;
+ const MCExpr *group_segment_fixed_size;
+ const MCExpr *private_segment_fixed_size;
+ const MCExpr *kernarg_size;
uint8_t reserved0[4];
int64_t kernel_code_entry_byte_offset;
uint8_t reserved1[20];
- uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+
- uint32_t compute_pgm_rsrc1;
- uint32_t compute_pgm_rsrc2;
- uint16_t kernel_code_properties;
- uint16_t kernarg_preload;
+ const MCExpr *compute_pgm_rsrc3; // GFX10+ and GFX90A+
+ const MCExpr *compute_pgm_rsrc1;
+ const MCExpr *compute_pgm_rsrc2;
+ const MCExpr *kernel_code_properties;
+ const MCExpr *kernarg_preload;
uint8_t reserved3[4];
+
+ static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift,
+ uint32_t Mask, MCContext &Ctx);
+ static const MCExpr *bits_get(const MCExpr *Src, uint32_t Shift,
+ uint32_t Mask, MCContext &Ctx);
+};
+
+// Sizes for kernel_descriptor_t properties, should add up to 64.
+enum : uint32_t {
+ SIZEOF_GROUP_SEGMENT_FIXED_SIZE = 4,
+ SIZEOF_PRIVATE_SEGMENT_FIXED_SIZE = 4,
+ SIZEOF_KERNARG_SIZE = 4,
+ SIZEOF_RESERVED0 = 4,
+ SIZEOF_KERNEL_CODE_ENTRY_BYTE_OFFSET = 8,
+ SIZEOF_RESERVED1 = 20,
+ SIZEOF_COMPUTE_PGM_RSRC3 = 4,
+ SIZEOF_COMPUTE_PGM_RSRC1 = 4,
+ SIZEOF_COMPUTE_PGM_RSRC2 = 4,
+ SIZEOF_KERNEL_CODE_PROPERTIES = 2,
+ SIZEOF_KERNARG_PRELOAD = 2,
+ SIZEOF_RESERVED3 = 4,
+ SIZEOF_KERNEL_DESCRIPTOR = 64
};
enum : uint32_t {
@@ -267,43 +293,6 @@ enum : uint32_t {
RESERVED3_OFFSET = 60
};
-static_assert(
- sizeof(kernel_descriptor_t) == 64,
- "invalid size for kernel_descriptor_t");
-static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) ==
- GROUP_SEGMENT_FIXED_SIZE_OFFSET,
- "invalid offset for group_segment_fixed_size");
-static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
- PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
- "invalid offset for private_segment_fixed_size");
-static_assert(offsetof(kernel_descriptor_t, kernarg_size) ==
- KERNARG_SIZE_OFFSET,
- "invalid offset for kernarg_size");
-static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
- "invalid offset for reserved0");
-static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==
- KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET,
- "invalid offset for kernel_code_entry_byte_offset");
-static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET,
- "invalid offset for reserved1");
-static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) ==
- COMPUTE_PGM_RSRC3_OFFSET,
- "invalid offset for compute_pgm_rsrc3");
-static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) ==
- COMPUTE_PGM_RSRC1_OFFSET,
- "invalid offset for compute_pgm_rsrc1");
-static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) ==
- COMPUTE_PGM_RSRC2_OFFSET,
- "invalid offset for compute_pgm_rsrc2");
-static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) ==
- KERNEL_CODE_PROPERTIES_OFFSET,
- "invalid offset for kernel_code_properties");
-static_assert(offsetof(kernel_descriptor_t, kernarg_preload) ==
- KERNARG_PRELOAD_OFFSET,
- "invalid offset for kernarg_preload");
-static_assert(offsetof(kernel_descriptor_t, reserved3) == RESERVED3_OFFSET,
- "invalid offset for reserved3");
-
} // end namespace amdhsa
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index db81e1ee9e3899..d68c7e499f62c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -434,24 +434,30 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
assert(isUInt<32>(PI.getComputePGMRSrc1(STM)));
assert(isUInt<32>(PI.getComputePGMRSrc2()));
- KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
- KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
+ KernelDescriptor.group_segment_fixed_size =
+ MCConstantExpr::create(PI.LDSSize, MF.getContext());
+ KernelDescriptor.private_segment_fixed_size =
+ MCConstantExpr::create(PI.ScratchSize, MF.getContext());
Align MaxKernArgAlign;
- KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
+ KernelDescriptor.kernarg_size = MCConstantExpr::create(
+ STM.getKernArgSegmentSize(F, MaxKernArgAlign), MF.getContext());
- KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM);
- KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2();
- KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
+ KernelDescriptor.compute_pgm_rsrc1 =
+ MCConstantExpr::create(PI.getComputePGMRSrc1(STM), MF.getContext());
+ KernelDescriptor.compute_pgm_rsrc2 =
+ MCConstantExpr::create(PI.getComputePGMRSrc2(), MF.getContext());
+ KernelDescriptor.kernel_code_properties = MCConstantExpr::create(
+ getAmdhsaKernelCodeProperties(MF), MF.getContext());
assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
- if (STM.hasGFX90AInsts())
- KernelDescriptor.compute_pgm_rsrc3 =
- CurrentProgramInfo.ComputePGMRSrc3GFX90A;
+ KernelDescriptor.compute_pgm_rsrc3 = MCConstantExpr::create(
+ STM.hasGFX90AInsts() ? CurrentProgramInfo.ComputePGMRSrc3GFX90A : 0,
+ MF.getContext());
- if (AMDGPU::hasKernargPreload(STM))
- KernelDescriptor.kernarg_preload =
- static_cast<uint16_t>(Info->getNumKernargPreloadedSGPRs());
+ KernelDescriptor.kernarg_preload = MCConstantExpr::create(
+ AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0,
+ MF.getContext());
return KernelDescriptor;
}
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 225e781588668f..2331af628fb730 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5236,7 +5236,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (getParser().parseIdentifier(KernelName))
return true;
- kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
+ kernel_descriptor_t KD =
+ getDefaultAmdhsaKernelDescriptor(&getSTI(), getContext());
StringSet<> Seen;
@@ -5276,89 +5277,107 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return TokError(".amdhsa_ directives cannot be repeated");
SMLoc ValStart = getLoc();
- int64_t IVal;
- if (getParser().parseAbsoluteExpression(IVal))
+ const MCExpr *ExprVal;
+ if (getParser().parseExpression(ExprVal))
return true;
SMLoc ValEnd = getLoc();
SMRange ValRange = SMRange(ValStart, ValEnd);
- if (IVal < 0)
- return OutOfRangeError(ValRange);
-
+ int64_t IVal = 0;
uint64_t Val = IVal;
+ bool EvaluatableExpr;
+ if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
+ if (IVal < 0)
+ return OutOfRangeError(ValRange);
+ Val = IVal;
+ }
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
- if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
+ if (!isUInt<ENTRY##_WIDTH>(Val)) \
return OutOfRangeError(RANGE); \
- AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
+ kernel_descriptor_t::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
+ getContext());
+
+#define EXPR_SHOULD_RESOLVE() \
+ if (!EvaluatableExpr) \
+ return Error(IDRange.Start, "directive should have resolvable expression", \
+ IDRange);
if (ID == ".amdhsa_group_segment_fixed_size") {
- if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
+ if (!isUInt<SIZEOF_GROUP_SEGMENT_FIXED_SIZE * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
- KD.group_segment_fixed_size = Val;
+ KD.group_segment_fixed_size = ExprVal;
} else if (ID == ".amdhsa_private_segment_fixed_size") {
- if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
+ if (!isUInt<SIZEOF_PRIVATE_SEGMENT_FIXED_SIZE * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
- KD.private_segment_fixed_size = Val;
+ KD.private_segment_fixed_size = ExprVal;
} else if (ID == ".amdhsa_kernarg_size") {
- if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
+ if (!isUInt<SIZEOF_KERNARG_SIZE * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
- KD.kernarg_size = Val;
+ KD.kernarg_size = ExprVal;
} else if (ID == ".amdhsa_user_sgpr_count") {
+ EXPR_SHOULD_RESOLVE();
ExplicitUserSGPRCount = Val;
} else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
+ EXPR_SHOULD_RESOLVE();
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
- Val, ValRange);
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 4;
} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
+ EXPR_SHOULD_RESOLVE();
if (!hasKernargPreload())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
if (Val > getMaxNumUserSGPRs())
return OutOfRangeError(ValRange);
- PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val,
+ PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
ValRange);
if (Val) {
ImpliedUserSGPRCount += Val;
PreloadLength = Val;
}
} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
+ EXPR_SHOULD_RESOLVE();
if (!hasKernargPreload())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
if (Val >= 1024)
return OutOfRangeError(ValRange);
- PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val,
+ PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
ValRange);
if (Val)
PreloadOffset = Val;
} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
+ EXPR_SHOULD_RESOLVE();
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
+ EXPR_SHOULD_RESOLVE();
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
+ EXPR_SHOULD_RESOLVE();
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
- Val, ValRange);
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
+ EXPR_SHOULD_RESOLVE();
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
@@ -5367,34 +5386,39 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
+ EXPR_SHOULD_RESOLVE();
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
- ValRange);
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
+ EXPR_SHOULD_RESOLVE();
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
- Val, ValRange);
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 1;
} else if (ID == ".amdhsa_wavefront_size32") {
+ EXPR_SHOULD_RESOLVE();
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
EnableWavefrontSize32 = Val;
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
- Val, ValRange);
+ KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_uses_dynamic_stack") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
+ KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_enable_private_segment") {
if (!hasArchitectedFlatScratch())
return Error(
@@ -5402,42 +5426,48 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
"directive is not supported without architected flat scratch",
IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_vgpr_workitem_id") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
ValRange);
} else if (ID == ".amdhsa_next_free_vgpr") {
+ EXPR_SHOULD_RESOLVE();
VGPRRange = ValRange;
NextFreeVGPR = Val;
} else if (ID == ".amdhsa_next_free_sgpr") {
+ EXPR_SHOULD_RESOLVE();
SGPRRange = ValRange;
NextFreeSGPR = Val;
} else if (ID == ".amdhsa_accum_offset") {
if (!isGFX90A())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
+ EXPR_SHOULD_RESOLVE();
AccumOffset = Val;
} else if (ID == ".amdhsa_reserve_vcc") {
+ EXPR_SHOULD_RESOLVE();
if (!isUInt<1>(Val))
return OutOfRangeError(ValRange);
ReserveVCC = Val;
} else if (ID == ".amdhsa_reserve_flat_scratch") {
+ EXPR_SHOULD_RESOLVE();
if (IVersion.Major < 7)
return Error(IDRange.Start, "directive requires gfx7+", IDRange);
if (hasArchitectedFlatScratch())
@@ -5457,97 +5487,105 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
IDRange);
} else if (ID == ".amdhsa_float_round_mode_32") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_float_round_mode_16_64") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_float_denorm_mode_32") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_float_denorm_mode_16_64") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
ValRange);
} else if (ID == ".amdhsa_dx10_clamp") {
if (IVersion.Major >= 12)
return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val,
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
ValRange);
} else if (ID == ".amdhsa_ieee_mode") {
if (IVersion.Major >= 12)
return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val,
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
ValRange);
} else if (ID == ".amdhsa_fp16_overflow") {
if (IVersion.Major < 9)
return Error(IDRange.Start, "directive requires gfx9+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
ValRange);
} else if (ID == ".amdhsa_tg_split") {
if (!isGFX90A())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
- ValRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_workgroup_processor_mode") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
ValRange);
} else if (ID == ".amdhsa_memory_ordered") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
ValRange);
} else if (ID == ".amdhsa_forward_progress") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
ValRange);
} else if (ID == ".amdhsa_shared_vgpr_count") {
+ EXPR_SHOULD_RESOLVE();
if (IVersion.Major < 10 || IVersion.Major >= 12)
return Error(IDRange.Start, "directive requires gfx10 or gfx11",
IDRange);
SharedVGPRCount = Val;
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
- COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, Val,
+ COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
- ValRange);
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_exception_fp_denorm_src") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
- Val, ValRange);
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
- ValRange);
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
- Val, ValRange);
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
- Val, ValRange);
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
- Val, ValRange);
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_exception_int_div_zero") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
- Val, ValRange);
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_round_robin_scheduling") {
if (IVersion.Major < 12)
return Error(IDRange.Start, "directive requires gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val,
+ COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
ValRange);
} else {
return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
@@ -5574,15 +5612,18 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
VGPRBlocks))
return OutOfRangeError(VGPRRange);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
+ kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc1, MCConstantExpr::create(VGPRBlocks, getContext()),
+ COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
+ COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
SGPRBlocks))
return OutOfRangeError(SGPRRange);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
- SGPRBlocks);
+ kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc1, MCConstantExpr::create(SGPRBlocks, getContext()),
+ COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
+ COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
return TokError("amdgpu_user_sgpr_count smaller than than implied by "
@@ -5593,11 +5634,17 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
return TokError("too many user SGPRs enabled");
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
- UserSGPRCount);
-
- if (PreloadLength && KD.kernarg_size &&
- (PreloadLength * 4 + PreloadOffset * 4 > KD.kernarg_size))
+ kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
+ COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
+ COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
+
+ int64_t IVal = 0;
+ if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
+ return TokError("Kernarg size should be resolvable");
+ uint64_t kernarg_size = IVal;
+ if (PreloadLength && kernarg_size &&
+ (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
return TokError("Kernarg preload length + offset is larger than the "
"kernarg segment size");
@@ -5609,8 +5656,11 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
"increments of 4");
if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
return TokError("accum_offset exceeds total VGPR allocation");
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
- (AccumOffset / 4 - 1));
+ kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc3,
+ MCConstantExpr::create(AccumOffset / 4 - 1, getContext()),
+ COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
+ COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
}
if (IVersion.Major >= 10 && IVersion.Major < 12) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 5e9b1674d87dcb..b2d0657e49f072 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -302,91 +302,142 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
bool ReserveVCC, bool ReserveFlatScr) {
IsaVersion IVersion = getIsaVersion(STI.getCPU());
+ const MCAsmInfo *MAI = getContext().getAsmInfo();
OS << "\t.amdhsa_kernel " << KernelName << '\n';
-#define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \
- STREAM << "\t\t" << DIRECTIVE << " " \
- << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
-
- OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
- << '\n';
- OS << "\t\t.amdhsa_private_segment_fixed_size "
- << KD.private_segment_fixed_size << '\n';
- OS << "\t\t.amdhsa_kernarg_size " << KD.kernarg_size << '\n';
-
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_count", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT);
+ auto print_field = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask,
+ StringRef Directive) {
+ int64_t IVal;
+ OS << "\t\t" << Directive << " ";
+ const MCExpr *pgm_rsrc1_bits =
+ amdhsa::kernel_descriptor_t::bits_get(Expr, Shift, Mask, getContext());
+ if (pgm_rsrc1_bits->evaluateAsAbsolute(IVal)) {
+ OS << static_cast<uint64_t>(IVal);
+ } else {
+ pgm_rsrc1_bits->print(OS, MAI);
+ }
+ OS << '\n';
+ };
+
+ OS << "\t\t.amdhsa_group_segment_fixed_size ";
+ KD.group_segment_fixed_size->print(OS, MAI);
+ OS << '\n';
+
+ OS << "\t\t.amdhsa_private_segment_fixed_size ";
+ KD.private_segment_fixed_size->print(OS, MAI);
+ OS << '\n';
+
+ OS << "\t\t.amdhsa_kernarg_size ";
+ KD.kernarg_size->print(OS, MAI);
+ OS << '\n';
+
+ print_field(
+ KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, ".amdhsa_user_sgpr_count");
if (!hasArchitectedFlatScratch(STI))
- PRINT_FIELD(
- OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
+ print_field(
+ KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
+ ".amdhsa_user_sgpr_private_segment_buffer");
+ print_field(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
+ ".amdhsa_user_sgpr_dispatch_ptr");
+ print_field(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
+ ".amdhsa_user_sgpr_queue_ptr");
+ print_field(
+ KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
+ ".amdhsa_user_sgpr_kernarg_segment_ptr");
+ print_field(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
+ ".amdhsa_user_sgpr_dispatch_id");
if (!hasArchitectedFlatScratch(STI))
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
+ print_field(
+ KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
+ ".amdhsa_user_sgpr_flat_scratch_init");
if (hasKernargPreload(STI)) {
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_length ", KD,
- kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_offset ", KD,
- kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET);
+ print_field(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT,
+ amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
+ ".amdhsa_user_sgpr_kernarg_preload_length");
+ print_field(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT,
+ amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
+ ".amdhsa_user_sgpr_kernarg_preload_offset");
}
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+ print_field(
+ KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
+ ".amdhsa_user_sgpr_private_segment_size");
if (IVersion.Major >= 10)
- PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
+ print_field(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
+ ".amdhsa_wavefront_size32");
if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
- PRINT_FIELD(OS, ".amdhsa_uses_dynamic_stack", KD, kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
- PRINT_FIELD(OS,
+ print_field(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
+ ".amdhsa_uses_dynamic_stack");
+ print_field(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
(hasArchitectedFlatScratch(STI)
? ".amdhsa_enable_private_segment"
- : ".amdhsa_system_sgpr_private_segment_wavefront_offset"),
- KD, compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
- PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
- PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
- PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
- PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
- PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
+ : ".amdhsa_system_sgpr_private_segment_wavefront_offset"));
+ print_field(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
+ ".amdhsa_system_sgpr_workgroup_id_x");
+ print_field(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
+ ".amdhsa_system_sgpr_workgroup_id_y");
+ print_field(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
+ ".amdhsa_system_sgpr_workgroup_id_z");
+ print_field(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
+ ".amdhsa_system_sgpr_workgroup_info");
+ print_field(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
+ ".amdhsa_system_vgpr_workitem_id");
// These directives are required.
OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n';
- if (AMDGPU::isGFX90A(STI))
- OS << "\t\t.amdhsa_accum_offset " <<
- (AMDHSA_BITS_GET(KD.compute_pgm_rsrc3,
- amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
- << '\n';
+ if (AMDGPU::isGFX90A(STI)) {
+ // MCExpr equivalent of taking the (accum_offset + 1) * 4.
+ const MCExpr *accum_bits = amdhsa::kernel_descriptor_t::bits_get(
+ KD.compute_pgm_rsrc3,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
+ accum_bits = MCBinaryExpr::createAdd(
+ accum_bits, MCConstantExpr::create(1, getContext()), getContext());
+ accum_bits = MCBinaryExpr::createMul(
+ accum_bits, MCConstantExpr::create(4, getContext()), getContext());
+ OS << "\t\t.amdhsa_accum_offset ";
+ int64_t IVal;
+ if (accum_bits->evaluateAsAbsolute(IVal)) {
+ OS << static_cast<uint64_t>(IVal);
+ } else {
+ accum_bits->print(OS, MAI);
+ }
+ OS << '\n';
+ }
if (!ReserveVCC)
OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
@@ -403,74 +454,105 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
break;
}
- PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
- PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
- PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
- PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
+ ".amdhsa_float_round_mode_32");
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
+ ".amdhsa_float_round_mode_16_64");
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
+ ".amdhsa_float_denorm_mode_32");
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
+ ".amdhsa_float_denorm_mode_16_64");
if (IVersion.Major < 12) {
- PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD, compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
- PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD, compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
+ ".amdhsa_dx10_clamp");
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
+ ".amdhsa_ieee_mode");
+ }
+ if (IVersion.Major >= 9) {
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
+ ".amdhsa_fp16_overflow");
}
- if (IVersion.Major >= 9)
- PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
if (AMDGPU::isGFX90A(STI))
- PRINT_FIELD(OS, ".amdhsa_tg_split", KD,
- compute_pgm_rsrc3,
- amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
+ print_field(KD.compute_pgm_rsrc3,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split");
if (IVersion.Major >= 10) {
- PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
- PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
- PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
+ ".amdhsa_workgroup_processor_mode");
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
+ ".amdhsa_memory_ordered");
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
+ ".amdhsa_forward_progress");
}
if (IVersion.Major >= 10 && IVersion.Major < 12) {
- PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
- amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
+ print_field(KD.compute_pgm_rsrc3,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
+ ".amdhsa_shared_vgpr_count");
+ }
+ if (IVersion.Major >= 12) {
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
+ ".amdhsa_round_robin_scheduling");
}
- if (IVersion.Major >= 12)
- PRINT_FIELD(OS, ".amdhsa_round_robin_scheduling", KD, compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
- PRINT_FIELD(
- OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
- PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
- PRINT_FIELD(
- OS, ".amdhsa_exception_fp_ieee_div_zero", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
- PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
- PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
- PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
- PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
-#undef PRINT_FIELD
+ print_field(
+ KD.compute_pgm_rsrc2,
+ amdhsa::
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
+ ".amdhsa_exception_fp_ieee_invalid_op");
+ print_field(
+ KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
+ ".amdhsa_exception_fp_denorm_src");
+ print_field(
+ KD.compute_pgm_rsrc2,
+ amdhsa::
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
+ ".amdhsa_exception_fp_ieee_div_zero");
+ print_field(
+ KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
+ ".amdhsa_exception_fp_ieee_overflow");
+ print_field(
+ KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
+ ".amdhsa_exception_fp_ieee_underflow");
+ print_field(
+ KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
+ ".amdhsa_exception_fp_ieee_inexact");
+ print_field(
+ KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
+ ".amdhsa_exception_int_div_zero");
OS << "\t.end_amdhsa_kernel\n";
}
@@ -819,7 +901,7 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
// Kernel descriptor symbol's type and size are fixed.
KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
KernelDescriptorSymbol->setSize(
- MCConstantExpr::create(sizeof(KernelDescriptor), Context));
+ MCConstantExpr::create(amdhsa::SIZEOF_KERNEL_DESCRIPTOR, Context));
// The visibility of the kernel code symbol must be protected or less to allow
// static relocations from the kernel descriptor to be used.
@@ -827,9 +909,12 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
Streamer.emitLabel(KernelDescriptorSymbol);
- Streamer.emitInt32(KernelDescriptor.group_segment_fixed_size);
- Streamer.emitInt32(KernelDescriptor.private_segment_fixed_size);
- Streamer.emitInt32(KernelDescriptor.kernarg_size);
+ Streamer.emitValue(KernelDescriptor.group_segment_fixed_size,
+ amdhsa::SIZEOF_GROUP_SEGMENT_FIXED_SIZE);
+ Streamer.emitValue(KernelDescriptor.private_segment_fixed_size,
+ amdhsa::SIZEOF_PRIVATE_SEGMENT_FIXED_SIZE);
+ Streamer.emitValue(KernelDescriptor.kernarg_size,
+ amdhsa::SIZEOF_KERNARG_SIZE);
for (uint8_t Res : KernelDescriptor.reserved0)
Streamer.emitInt8(Res);
@@ -838,20 +923,26 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
// expression being created is:
// (start of kernel code) - (start of kernel descriptor)
// It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
- Streamer.emitValue(MCBinaryExpr::createSub(
- MCSymbolRefExpr::create(
- KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
- MCSymbolRefExpr::create(
- KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context),
- Context),
- sizeof(KernelDescriptor.kernel_code_entry_byte_offset));
+ Streamer.emitValue(
+ MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(KernelCodeSymbol,
+ MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
+ MCSymbolRefExpr::create(KernelDescriptorSymbol,
+ MCSymbolRefExpr::VK_None, Context),
+ Context),
+ amdhsa::SIZEOF_KERNEL_CODE_ENTRY_BYTE_OFFSET);
for (uint8_t Res : KernelDescriptor.reserved1)
Streamer.emitInt8(Res);
- Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc3);
- Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc1);
- Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc2);
- Streamer.emitInt16(KernelDescriptor.kernel_code_properties);
- Streamer.emitInt16(KernelDescriptor.kernarg_preload);
+ Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc3,
+ amdhsa::SIZEOF_COMPUTE_PGM_RSRC3);
+ Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc1,
+ amdhsa::SIZEOF_COMPUTE_PGM_RSRC1);
+ Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc2,
+ amdhsa::SIZEOF_COMPUTE_PGM_RSRC2);
+ Streamer.emitValue(KernelDescriptor.kernel_code_properties,
+ amdhsa::SIZEOF_KERNEL_CODE_PROPERTIES);
+ Streamer.emitValue(KernelDescriptor.kernarg_preload,
+ amdhsa::SIZEOF_KERNARG_PRELOAD);
for (uint8_t Res : KernelDescriptor.reserved3)
Streamer.emitInt8(Res);
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDHSAKernelDescriptor.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDHSAKernelDescriptor.cpp
new file mode 100644
index 00000000000000..905723b7325bc8
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDHSAKernelDescriptor.cpp
@@ -0,0 +1,32 @@
+//===--- AMDHSAKernelDescriptor.h -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/AMDHSAKernelDescriptor.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+
+using namespace llvm;
+using namespace llvm::amdhsa;
+
+void kernel_descriptor_t::bits_set(const MCExpr *&Dst, const MCExpr *Value,
+ uint32_t Shift, uint32_t Mask,
+ MCContext &Ctx) {
+ auto Sft = MCConstantExpr::create(Shift, Ctx);
+ auto Msk = MCConstantExpr::create(Mask, Ctx);
+ Dst = MCBinaryExpr::createAnd(Dst, MCUnaryExpr::createNot(Msk, Ctx), Ctx);
+ Dst = MCBinaryExpr::createOr(Dst, MCBinaryExpr::createShl(Value, Sft, Ctx),
+ Ctx);
+}
+
+const MCExpr *kernel_descriptor_t::bits_get(const MCExpr *Src, uint32_t Shift,
+ uint32_t Mask, MCContext &Ctx) {
+ auto Sft = MCConstantExpr::create(Shift, Ctx);
+ auto Msk = MCConstantExpr::create(Mask, Ctx);
+ return MCBinaryExpr::createLShr(MCBinaryExpr::createAnd(Src, Msk, Ctx), Sft,
+ Ctx);
+}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
index 5dc76071b0594e..51df7bb3ebe840 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_component_library(LLVMAMDGPUDesc
AMDGPUMCCodeEmitter.cpp
AMDGPUMCTargetDesc.cpp
AMDGPUTargetStreamer.cpp
+ AMDHSAKernelDescriptor.cpp
R600InstPrinter.cpp
R600MCCodeEmitter.cpp
R600MCTargetDesc.cpp
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 33335ac75df768..4b37fc7569aa15 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -1186,44 +1187,65 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
}
}
-amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
- const MCSubtargetInfo *STI) {
+amdhsa::kernel_descriptor_t
+getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx) {
IsaVersion Version = getIsaVersion(STI->getCPU());
amdhsa::kernel_descriptor_t KD;
memset(&KD, 0, sizeof(KD));
-
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
- amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
- if (Version.Major >= 12) {
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, 0);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF, 0);
- } else {
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, 1);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, 1);
+ const MCExpr *ZeroMCExpr = MCConstantExpr::create(0, Ctx);
+ const MCExpr *OneMCExpr = MCConstantExpr::create(1, Ctx);
+
+ KD.group_segment_fixed_size = ZeroMCExpr;
+ KD.private_segment_fixed_size = ZeroMCExpr;
+ KD.compute_pgm_rsrc1 = ZeroMCExpr;
+ KD.compute_pgm_rsrc2 = ZeroMCExpr;
+ KD.compute_pgm_rsrc3 = ZeroMCExpr;
+ KD.kernarg_size = ZeroMCExpr;
+ KD.kernel_code_properties = ZeroMCExpr;
+ KD.kernarg_preload = ZeroMCExpr;
+
+ amdhsa::kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc1,
+ MCConstantExpr::create(amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE, Ctx),
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Ctx);
+ if (Version.Major < 12) {
+ amdhsa::kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc1, OneMCExpr,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Ctx);
+ amdhsa::kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc1, OneMCExpr,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Ctx);
}
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
+ amdhsa::kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc2, OneMCExpr,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Ctx);
if (Version.Major >= 10) {
- AMDHSA_BITS_SET(KD.kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
- STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
- STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1);
- }
- if (AMDGPU::isGFX90A(*STI)) {
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc3,
- amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
- STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0);
+ if (STI->getFeatureBits().test(FeatureWavefrontSize32))
+ amdhsa::kernel_descriptor_t::bits_set(
+ KD.kernel_code_properties, OneMCExpr,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, Ctx);
+ if (!STI->getFeatureBits().test(FeatureCuMode))
+ amdhsa::kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc1, OneMCExpr,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Ctx);
+
+ amdhsa::kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc1, OneMCExpr,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Ctx);
}
+ if (AMDGPU::isGFX90A(*STI) && STI->getFeatureBits().test(FeatureTgSplit))
+ amdhsa::kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc3, OneMCExpr,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx);
return KD;
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f24b9f0e3615de..0567a2001fbb48 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -26,6 +26,7 @@ struct Align;
class Argument;
class Function;
class GlobalValue;
+class MCContext;
class MCInstrInfo;
class MCRegisterClass;
class MCRegisterInfo;
@@ -798,8 +799,8 @@ unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const MCSubtargetInfo *STI);
-amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
- const MCSubtargetInfo *STI);
+amdhsa::kernel_descriptor_t
+getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx);
bool isGroupSegment(const GlobalValue *GV);
bool isGlobalSegment(const GlobalValue *GV);
diff --git a/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s b/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s
index 8b90e20bb87d1f..7b591904e877fd 100644
--- a/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s
+++ b/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s
@@ -29,7 +29,7 @@
// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0030 00000c60 80000000 00000000 00000000
+// OBJDUMP-NEXT: 0030 00000c60 80000000 00040000 00000000
// complete
// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
@@ -39,12 +39,12 @@
// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000
+// OBJDUMP-NEXT: 00b0 00000060 80000000 00040000 00000000
// disabled_user_sgpr
// OBJDUMP-NEXT: 00c0 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00f0 00000c60 80000000 00000000 00000000
+// OBJDUMP-NEXT: 00f0 00000c60 80000000 00040000 00000000
.text
// ASM: .text
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs.s
new file mode 100644
index 00000000000000..a2764d40655bc0
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs.s
@@ -0,0 +1,68 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=obj < %s > %t
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// When going from asm -> asm, the expressions should remain the same (i.e., symbolic).
+// When going from asm -> obj, the expressions should get resolved (through fixups),
+
+// OBJDUMP: Contents of section .rodata
+// expr_defined_later
+// OBJDUMP-NEXT: 0000 2b000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
+// expr_defined
+// OBJDUMP-NEXT: 0040 2d000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0070 0000ac00 80000000 00000000 00000000
+
+.text
+// ASM: .text
+
+.amdhsa_code_object_version 4
+// ASM: .amdhsa_code_object_version 4
+
+.p2align 8
+.type expr_defined_later, at function
+expr_defined_later:
+ s_endpgm
+
+.p2align 8
+.type expr_defined, at function
+expr_defined:
+ s_endpgm
+
+.rodata
+// ASM: .rodata
+
+.p2align 6
+.amdhsa_kernel expr_defined_later
+ .amdhsa_group_segment_fixed_size defined_value+2
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+.end_amdhsa_kernel
+
+.set defined_value, 41
+
+.p2align 6
+.amdhsa_kernel expr_defined
+ .amdhsa_group_segment_fixed_size defined_value+4
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+.end_amdhsa_kernel
+
+
+
+// ASM: .amdhsa_kernel expr_defined_later
+// ASM: .amdhsa_group_segment_fixed_size defined_value+2
+// ASM: .end_amdhsa_kernel
+
+// ASM: .set defined_value, 41
+// ASM-NEXT: .no_dead_strip defined_value
+
+// ASM: .amdhsa_kernel expr_defined
+// ASM: .amdhsa_group_segment_fixed_size 45
+// ASM: .end_amdhsa_kernel
diff --git a/llvm/test/MC/AMDGPU/hsa-tg-split.s b/llvm/test/MC/AMDGPU/hsa-tg-split.s
new file mode 100644
index 00000000000000..5a4d3e2c279c50
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-tg-split.s
@@ -0,0 +1,74 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack,+tgsplit < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack,+tgsplit -filetype=obj < %s > %t
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// OBJDUMP: Contents of section .rodata
+// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000100
+// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
+
+.text
+// ASM: .text
+
+.amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+"
+// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+"
+
+.amdhsa_code_object_version 4
+// ASM: .amdhsa_code_object_version 4
+
+.p2align 8
+.type minimal, at function
+minimal:
+ s_endpgm
+
+.rodata
+// ASM: .rodata
+
+.p2align 6
+.amdhsa_kernel minimal
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel minimal
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 0
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 0
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 0
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_accum_offset 4
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
+// ASM-NEXT: .amdhsa_float_round_mode_32 0
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 0
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 0
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3
+// ASM-NEXT: .amdhsa_dx10_clamp 1
+// ASM-NEXT: .amdhsa_ieee_mode 1
+// ASM-NEXT: .amdhsa_fp16_overflow 0
+// ASM-NEXT: .amdhsa_tg_split 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+// ASM-NEXT: .amdhsa_exception_int_div_zero 0
+// ASM-NEXT: .end_amdhsa_kernel
More information about the llvm-commits
mailing list