[llvm] [AMDGPU] MCExpr-ify MC layer kernel descriptor (PR #80855)
Janek van Oirschot via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 9 03:03:17 PST 2024
https://github.com/JanekvO updated https://github.com/llvm/llvm-project/pull/80855
>From b3e77d2dcbd35ff442cb24dc907ed88dc42bd023 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Tue, 6 Feb 2024 15:29:26 +0000
Subject: [PATCH 1/3] MCExpr-ify MC layer kernel descriptor
---
.../llvm/Support/AMDHSAKernelDescriptor.h | 79 ++--
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 30 +-
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 188 +++++----
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 381 +++++++++++-------
.../MCTargetDesc/AMDHSAKernelDescriptor.cpp | 32 ++
.../Target/AMDGPU/MCTargetDesc/CMakeLists.txt | 1 +
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 84 ++--
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 5 +-
llvm/test/MC/AMDGPU/hsa-gfx12-v4.s | 6 +-
llvm/test/MC/AMDGPU/hsa-sym-exprs.s | 68 ++++
llvm/test/MC/AMDGPU/hsa-tg-split.s | 74 ++++
11 files changed, 641 insertions(+), 307 deletions(-)
create mode 100644 llvm/lib/Target/AMDGPU/MCTargetDesc/AMDHSAKernelDescriptor.cpp
create mode 100644 llvm/test/MC/AMDGPU/hsa-sym-exprs.s
create mode 100644 llvm/test/MC/AMDGPU/hsa-tg-split.s
diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
index 84cac3ef700e0..9c5d8fa1c1a60 100644
--- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -52,6 +52,10 @@
#endif // AMDHSA_BITS_SET
namespace llvm {
+
+class MCContext;
+class MCExpr;
+
namespace amdhsa {
// Floating point rounding modes. Must match hardware definition.
@@ -238,18 +242,40 @@ enum : int32_t {
// Kernel descriptor. Must be kept backwards compatible.
struct kernel_descriptor_t {
- uint32_t group_segment_fixed_size;
- uint32_t private_segment_fixed_size;
- uint32_t kernarg_size;
+ const MCExpr *group_segment_fixed_size;
+ const MCExpr *private_segment_fixed_size;
+ const MCExpr *kernarg_size;
uint8_t reserved0[4];
int64_t kernel_code_entry_byte_offset;
uint8_t reserved1[20];
- uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+
- uint32_t compute_pgm_rsrc1;
- uint32_t compute_pgm_rsrc2;
- uint16_t kernel_code_properties;
- uint16_t kernarg_preload;
+ const MCExpr *compute_pgm_rsrc3; // GFX10+ and GFX90A+
+ const MCExpr *compute_pgm_rsrc1;
+ const MCExpr *compute_pgm_rsrc2;
+ const MCExpr *kernel_code_properties;
+ const MCExpr *kernarg_preload;
uint8_t reserved3[4];
+
+ static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift,
+ uint32_t Mask, MCContext &Ctx);
+ static const MCExpr *bits_get(const MCExpr *Src, uint32_t Shift,
+ uint32_t Mask, MCContext &Ctx);
+};
+
+// Sizes for kernel_descriptor_t properties, should add up to 64.
+enum : uint32_t {
+ SIZEOF_GROUP_SEGMENT_FIXED_SIZE = 4,
+ SIZEOF_PRIVATE_SEGMENT_FIXED_SIZE = 4,
+ SIZEOF_KERNARG_SIZE = 4,
+ SIZEOF_RESERVED0 = 4,
+ SIZEOF_KERNEL_CODE_ENTRY_BYTE_OFFSET = 8,
+ SIZEOF_RESERVED1 = 20,
+ SIZEOF_COMPUTE_PGM_RSRC3 = 4,
+ SIZEOF_COMPUTE_PGM_RSRC1 = 4,
+ SIZEOF_COMPUTE_PGM_RSRC2 = 4,
+ SIZEOF_KERNEL_CODE_PROPERTIES = 2,
+ SIZEOF_KERNARG_PRELOAD = 2,
+ SIZEOF_RESERVED3 = 4,
+ SIZEOF_KERNEL_DESCRIPTOR = 64
};
enum : uint32_t {
@@ -267,43 +293,6 @@ enum : uint32_t {
RESERVED3_OFFSET = 60
};
-static_assert(
- sizeof(kernel_descriptor_t) == 64,
- "invalid size for kernel_descriptor_t");
-static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) ==
- GROUP_SEGMENT_FIXED_SIZE_OFFSET,
- "invalid offset for group_segment_fixed_size");
-static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
- PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
- "invalid offset for private_segment_fixed_size");
-static_assert(offsetof(kernel_descriptor_t, kernarg_size) ==
- KERNARG_SIZE_OFFSET,
- "invalid offset for kernarg_size");
-static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
- "invalid offset for reserved0");
-static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==
- KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET,
- "invalid offset for kernel_code_entry_byte_offset");
-static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET,
- "invalid offset for reserved1");
-static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) ==
- COMPUTE_PGM_RSRC3_OFFSET,
- "invalid offset for compute_pgm_rsrc3");
-static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) ==
- COMPUTE_PGM_RSRC1_OFFSET,
- "invalid offset for compute_pgm_rsrc1");
-static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) ==
- COMPUTE_PGM_RSRC2_OFFSET,
- "invalid offset for compute_pgm_rsrc2");
-static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) ==
- KERNEL_CODE_PROPERTIES_OFFSET,
- "invalid offset for kernel_code_properties");
-static_assert(offsetof(kernel_descriptor_t, kernarg_preload) ==
- KERNARG_PRELOAD_OFFSET,
- "invalid offset for kernarg_preload");
-static_assert(offsetof(kernel_descriptor_t, reserved3) == RESERVED3_OFFSET,
- "invalid offset for reserved3");
-
} // end namespace amdhsa
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index db81e1ee9e389..d68c7e499f62c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -434,24 +434,30 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
assert(isUInt<32>(PI.getComputePGMRSrc1(STM)));
assert(isUInt<32>(PI.getComputePGMRSrc2()));
- KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
- KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
+ KernelDescriptor.group_segment_fixed_size =
+ MCConstantExpr::create(PI.LDSSize, MF.getContext());
+ KernelDescriptor.private_segment_fixed_size =
+ MCConstantExpr::create(PI.ScratchSize, MF.getContext());
Align MaxKernArgAlign;
- KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
+ KernelDescriptor.kernarg_size = MCConstantExpr::create(
+ STM.getKernArgSegmentSize(F, MaxKernArgAlign), MF.getContext());
- KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM);
- KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2();
- KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
+ KernelDescriptor.compute_pgm_rsrc1 =
+ MCConstantExpr::create(PI.getComputePGMRSrc1(STM), MF.getContext());
+ KernelDescriptor.compute_pgm_rsrc2 =
+ MCConstantExpr::create(PI.getComputePGMRSrc2(), MF.getContext());
+ KernelDescriptor.kernel_code_properties = MCConstantExpr::create(
+ getAmdhsaKernelCodeProperties(MF), MF.getContext());
assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
- if (STM.hasGFX90AInsts())
- KernelDescriptor.compute_pgm_rsrc3 =
- CurrentProgramInfo.ComputePGMRSrc3GFX90A;
+ KernelDescriptor.compute_pgm_rsrc3 = MCConstantExpr::create(
+ STM.hasGFX90AInsts() ? CurrentProgramInfo.ComputePGMRSrc3GFX90A : 0,
+ MF.getContext());
- if (AMDGPU::hasKernargPreload(STM))
- KernelDescriptor.kernarg_preload =
- static_cast<uint16_t>(Info->getNumKernargPreloadedSGPRs());
+ KernelDescriptor.kernarg_preload = MCConstantExpr::create(
+ AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0,
+ MF.getContext());
return KernelDescriptor;
}
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 225e781588668..2331af628fb73 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5236,7 +5236,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (getParser().parseIdentifier(KernelName))
return true;
- kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
+ kernel_descriptor_t KD =
+ getDefaultAmdhsaKernelDescriptor(&getSTI(), getContext());
StringSet<> Seen;
@@ -5276,89 +5277,107 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return TokError(".amdhsa_ directives cannot be repeated");
SMLoc ValStart = getLoc();
- int64_t IVal;
- if (getParser().parseAbsoluteExpression(IVal))
+ const MCExpr *ExprVal;
+ if (getParser().parseExpression(ExprVal))
return true;
SMLoc ValEnd = getLoc();
SMRange ValRange = SMRange(ValStart, ValEnd);
- if (IVal < 0)
- return OutOfRangeError(ValRange);
-
+ int64_t IVal = 0;
uint64_t Val = IVal;
+ bool EvaluatableExpr;
+ if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
+ if (IVal < 0)
+ return OutOfRangeError(ValRange);
+ Val = IVal;
+ }
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
- if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
+ if (!isUInt<ENTRY##_WIDTH>(Val)) \
return OutOfRangeError(RANGE); \
- AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
+ kernel_descriptor_t::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
+ getContext());
+
+#define EXPR_SHOULD_RESOLVE() \
+ if (!EvaluatableExpr) \
+ return Error(IDRange.Start, "directive should have resolvable expression", \
+ IDRange);
if (ID == ".amdhsa_group_segment_fixed_size") {
- if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
+ if (!isUInt<SIZEOF_GROUP_SEGMENT_FIXED_SIZE * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
- KD.group_segment_fixed_size = Val;
+ KD.group_segment_fixed_size = ExprVal;
} else if (ID == ".amdhsa_private_segment_fixed_size") {
- if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
+ if (!isUInt<SIZEOF_PRIVATE_SEGMENT_FIXED_SIZE * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
- KD.private_segment_fixed_size = Val;
+ KD.private_segment_fixed_size = ExprVal;
} else if (ID == ".amdhsa_kernarg_size") {
- if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
+ if (!isUInt<SIZEOF_KERNARG_SIZE * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
- KD.kernarg_size = Val;
+ KD.kernarg_size = ExprVal;
} else if (ID == ".amdhsa_user_sgpr_count") {
+ EXPR_SHOULD_RESOLVE();
ExplicitUserSGPRCount = Val;
} else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
+ EXPR_SHOULD_RESOLVE();
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
- Val, ValRange);
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 4;
} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
+ EXPR_SHOULD_RESOLVE();
if (!hasKernargPreload())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
if (Val > getMaxNumUserSGPRs())
return OutOfRangeError(ValRange);
- PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val,
+ PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
ValRange);
if (Val) {
ImpliedUserSGPRCount += Val;
PreloadLength = Val;
}
} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
+ EXPR_SHOULD_RESOLVE();
if (!hasKernargPreload())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
if (Val >= 1024)
return OutOfRangeError(ValRange);
- PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val,
+ PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
ValRange);
if (Val)
PreloadOffset = Val;
} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
+ EXPR_SHOULD_RESOLVE();
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
+ EXPR_SHOULD_RESOLVE();
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
+ EXPR_SHOULD_RESOLVE();
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
- Val, ValRange);
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
+ EXPR_SHOULD_RESOLVE();
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
@@ -5367,34 +5386,39 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
+ EXPR_SHOULD_RESOLVE();
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
- ValRange);
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
+ EXPR_SHOULD_RESOLVE();
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
- Val, ValRange);
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 1;
} else if (ID == ".amdhsa_wavefront_size32") {
+ EXPR_SHOULD_RESOLVE();
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
EnableWavefrontSize32 = Val;
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
- Val, ValRange);
+ KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_uses_dynamic_stack") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
+ KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_enable_private_segment") {
if (!hasArchitectedFlatScratch())
return Error(
@@ -5402,42 +5426,48 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
"directive is not supported without architected flat scratch",
IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_vgpr_workitem_id") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
ValRange);
} else if (ID == ".amdhsa_next_free_vgpr") {
+ EXPR_SHOULD_RESOLVE();
VGPRRange = ValRange;
NextFreeVGPR = Val;
} else if (ID == ".amdhsa_next_free_sgpr") {
+ EXPR_SHOULD_RESOLVE();
SGPRRange = ValRange;
NextFreeSGPR = Val;
} else if (ID == ".amdhsa_accum_offset") {
if (!isGFX90A())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
+ EXPR_SHOULD_RESOLVE();
AccumOffset = Val;
} else if (ID == ".amdhsa_reserve_vcc") {
+ EXPR_SHOULD_RESOLVE();
if (!isUInt<1>(Val))
return OutOfRangeError(ValRange);
ReserveVCC = Val;
} else if (ID == ".amdhsa_reserve_flat_scratch") {
+ EXPR_SHOULD_RESOLVE();
if (IVersion.Major < 7)
return Error(IDRange.Start, "directive requires gfx7+", IDRange);
if (hasArchitectedFlatScratch())
@@ -5457,97 +5487,105 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
IDRange);
} else if (ID == ".amdhsa_float_round_mode_32") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_float_round_mode_16_64") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_float_denorm_mode_32") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_float_denorm_mode_16_64") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
ValRange);
} else if (ID == ".amdhsa_dx10_clamp") {
if (IVersion.Major >= 12)
return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val,
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
ValRange);
} else if (ID == ".amdhsa_ieee_mode") {
if (IVersion.Major >= 12)
return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val,
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
ValRange);
} else if (ID == ".amdhsa_fp16_overflow") {
if (IVersion.Major < 9)
return Error(IDRange.Start, "directive requires gfx9+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
ValRange);
} else if (ID == ".amdhsa_tg_split") {
if (!isGFX90A())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
- ValRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_workgroup_processor_mode") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
ValRange);
} else if (ID == ".amdhsa_memory_ordered") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
ValRange);
} else if (ID == ".amdhsa_forward_progress") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
ValRange);
} else if (ID == ".amdhsa_shared_vgpr_count") {
+ EXPR_SHOULD_RESOLVE();
if (IVersion.Major < 10 || IVersion.Major >= 12)
return Error(IDRange.Start, "directive requires gfx10 or gfx11",
IDRange);
SharedVGPRCount = Val;
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
- COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, Val,
+ COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
- ValRange);
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_exception_fp_denorm_src") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
- Val, ValRange);
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
- ValRange);
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
- Val, ValRange);
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
- Val, ValRange);
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
- Val, ValRange);
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_exception_int_div_zero") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
- Val, ValRange);
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_round_robin_scheduling") {
if (IVersion.Major < 12)
return Error(IDRange.Start, "directive requires gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val,
+ COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
ValRange);
} else {
return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
@@ -5574,15 +5612,18 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
VGPRBlocks))
return OutOfRangeError(VGPRRange);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
+ kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc1, MCConstantExpr::create(VGPRBlocks, getContext()),
+ COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
+ COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
SGPRBlocks))
return OutOfRangeError(SGPRRange);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
- SGPRBlocks);
+ kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc1, MCConstantExpr::create(SGPRBlocks, getContext()),
+ COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
+ COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
return TokError("amdgpu_user_sgpr_count smaller than than implied by "
@@ -5593,11 +5634,17 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
return TokError("too many user SGPRs enabled");
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
- UserSGPRCount);
-
- if (PreloadLength && KD.kernarg_size &&
- (PreloadLength * 4 + PreloadOffset * 4 > KD.kernarg_size))
+ kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
+ COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
+ COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
+
+ int64_t IVal = 0;
+ if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
+ return TokError("Kernarg size should be resolvable");
+ uint64_t kernarg_size = IVal;
+ if (PreloadLength && kernarg_size &&
+ (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
return TokError("Kernarg preload length + offset is larger than the "
"kernarg segment size");
@@ -5609,8 +5656,11 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
"increments of 4");
if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
return TokError("accum_offset exceeds total VGPR allocation");
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
- (AccumOffset / 4 - 1));
+ kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc3,
+ MCConstantExpr::create(AccumOffset / 4 - 1, getContext()),
+ COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
+ COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
}
if (IVersion.Major >= 10 && IVersion.Major < 12) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 5e9b1674d87dc..b2d0657e49f07 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -302,91 +302,142 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
bool ReserveVCC, bool ReserveFlatScr) {
IsaVersion IVersion = getIsaVersion(STI.getCPU());
+ const MCAsmInfo *MAI = getContext().getAsmInfo();
OS << "\t.amdhsa_kernel " << KernelName << '\n';
-#define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \
- STREAM << "\t\t" << DIRECTIVE << " " \
- << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
-
- OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
- << '\n';
- OS << "\t\t.amdhsa_private_segment_fixed_size "
- << KD.private_segment_fixed_size << '\n';
- OS << "\t\t.amdhsa_kernarg_size " << KD.kernarg_size << '\n';
-
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_count", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT);
+ auto print_field = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask,
+ StringRef Directive) {
+ int64_t IVal;
+ OS << "\t\t" << Directive << " ";
+ const MCExpr *pgm_rsrc1_bits =
+ amdhsa::kernel_descriptor_t::bits_get(Expr, Shift, Mask, getContext());
+ if (pgm_rsrc1_bits->evaluateAsAbsolute(IVal)) {
+ OS << static_cast<uint64_t>(IVal);
+ } else {
+ pgm_rsrc1_bits->print(OS, MAI);
+ }
+ OS << '\n';
+ };
+
+ OS << "\t\t.amdhsa_group_segment_fixed_size ";
+ KD.group_segment_fixed_size->print(OS, MAI);
+ OS << '\n';
+
+ OS << "\t\t.amdhsa_private_segment_fixed_size ";
+ KD.private_segment_fixed_size->print(OS, MAI);
+ OS << '\n';
+
+ OS << "\t\t.amdhsa_kernarg_size ";
+ KD.kernarg_size->print(OS, MAI);
+ OS << '\n';
+
+ print_field(
+ KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, ".amdhsa_user_sgpr_count");
if (!hasArchitectedFlatScratch(STI))
- PRINT_FIELD(
- OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
+ print_field(
+ KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
+ ".amdhsa_user_sgpr_private_segment_buffer");
+ print_field(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
+ ".amdhsa_user_sgpr_dispatch_ptr");
+ print_field(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
+ ".amdhsa_user_sgpr_queue_ptr");
+ print_field(
+ KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
+ ".amdhsa_user_sgpr_kernarg_segment_ptr");
+ print_field(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
+ ".amdhsa_user_sgpr_dispatch_id");
if (!hasArchitectedFlatScratch(STI))
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
+ print_field(
+ KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
+ ".amdhsa_user_sgpr_flat_scratch_init");
if (hasKernargPreload(STI)) {
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_length ", KD,
- kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_offset ", KD,
- kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET);
+ print_field(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT,
+ amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
+ ".amdhsa_user_sgpr_kernarg_preload_length");
+ print_field(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT,
+ amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
+ ".amdhsa_user_sgpr_kernarg_preload_offset");
}
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+ print_field(
+ KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
+ ".amdhsa_user_sgpr_private_segment_size");
if (IVersion.Major >= 10)
- PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
+ print_field(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
+ ".amdhsa_wavefront_size32");
if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
- PRINT_FIELD(OS, ".amdhsa_uses_dynamic_stack", KD, kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
- PRINT_FIELD(OS,
+ print_field(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
+ ".amdhsa_uses_dynamic_stack");
+ print_field(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
(hasArchitectedFlatScratch(STI)
? ".amdhsa_enable_private_segment"
- : ".amdhsa_system_sgpr_private_segment_wavefront_offset"),
- KD, compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
- PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
- PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
- PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
- PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
- PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
+ : ".amdhsa_system_sgpr_private_segment_wavefront_offset"));
+ print_field(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
+ ".amdhsa_system_sgpr_workgroup_id_x");
+ print_field(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
+ ".amdhsa_system_sgpr_workgroup_id_y");
+ print_field(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
+ ".amdhsa_system_sgpr_workgroup_id_z");
+ print_field(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
+ ".amdhsa_system_sgpr_workgroup_info");
+ print_field(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
+ ".amdhsa_system_vgpr_workitem_id");
// These directives are required.
OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n';
- if (AMDGPU::isGFX90A(STI))
- OS << "\t\t.amdhsa_accum_offset " <<
- (AMDHSA_BITS_GET(KD.compute_pgm_rsrc3,
- amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
- << '\n';
+ if (AMDGPU::isGFX90A(STI)) {
+ // MCExpr equivalent of taking the (accum_offset + 1) * 4.
+ const MCExpr *accum_bits = amdhsa::kernel_descriptor_t::bits_get(
+ KD.compute_pgm_rsrc3,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
+ accum_bits = MCBinaryExpr::createAdd(
+ accum_bits, MCConstantExpr::create(1, getContext()), getContext());
+ accum_bits = MCBinaryExpr::createMul(
+ accum_bits, MCConstantExpr::create(4, getContext()), getContext());
+ OS << "\t\t.amdhsa_accum_offset ";
+ int64_t IVal;
+ if (accum_bits->evaluateAsAbsolute(IVal)) {
+ OS << static_cast<uint64_t>(IVal);
+ } else {
+ accum_bits->print(OS, MAI);
+ }
+ OS << '\n';
+ }
if (!ReserveVCC)
OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
@@ -403,74 +454,105 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
break;
}
- PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
- PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
- PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
- PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
+ ".amdhsa_float_round_mode_32");
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
+ ".amdhsa_float_round_mode_16_64");
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
+ ".amdhsa_float_denorm_mode_32");
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
+ ".amdhsa_float_denorm_mode_16_64");
if (IVersion.Major < 12) {
- PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD, compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
- PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD, compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
+ ".amdhsa_dx10_clamp");
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
+ ".amdhsa_ieee_mode");
+ }
+ if (IVersion.Major >= 9) {
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
+ ".amdhsa_fp16_overflow");
}
- if (IVersion.Major >= 9)
- PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
if (AMDGPU::isGFX90A(STI))
- PRINT_FIELD(OS, ".amdhsa_tg_split", KD,
- compute_pgm_rsrc3,
- amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
+ print_field(KD.compute_pgm_rsrc3,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split");
if (IVersion.Major >= 10) {
- PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
- PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
- PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
+ ".amdhsa_workgroup_processor_mode");
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
+ ".amdhsa_memory_ordered");
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
+ ".amdhsa_forward_progress");
}
if (IVersion.Major >= 10 && IVersion.Major < 12) {
- PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
- amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
+ print_field(KD.compute_pgm_rsrc3,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
+ ".amdhsa_shared_vgpr_count");
+ }
+ if (IVersion.Major >= 12) {
+ print_field(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
+ ".amdhsa_round_robin_scheduling");
}
- if (IVersion.Major >= 12)
- PRINT_FIELD(OS, ".amdhsa_round_robin_scheduling", KD, compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
- PRINT_FIELD(
- OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
- PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
- PRINT_FIELD(
- OS, ".amdhsa_exception_fp_ieee_div_zero", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
- PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
- PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
- PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
- PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
-#undef PRINT_FIELD
+ print_field(
+ KD.compute_pgm_rsrc2,
+ amdhsa::
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
+ ".amdhsa_exception_fp_ieee_invalid_op");
+ print_field(
+ KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
+ ".amdhsa_exception_fp_denorm_src");
+ print_field(
+ KD.compute_pgm_rsrc2,
+ amdhsa::
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
+ ".amdhsa_exception_fp_ieee_div_zero");
+ print_field(
+ KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
+ ".amdhsa_exception_fp_ieee_overflow");
+ print_field(
+ KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
+ ".amdhsa_exception_fp_ieee_underflow");
+ print_field(
+ KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
+ ".amdhsa_exception_fp_ieee_inexact");
+ print_field(
+ KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
+ ".amdhsa_exception_int_div_zero");
OS << "\t.end_amdhsa_kernel\n";
}
@@ -819,7 +901,7 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
// Kernel descriptor symbol's type and size are fixed.
KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
KernelDescriptorSymbol->setSize(
- MCConstantExpr::create(sizeof(KernelDescriptor), Context));
+ MCConstantExpr::create(amdhsa::SIZEOF_KERNEL_DESCRIPTOR, Context));
// The visibility of the kernel code symbol must be protected or less to allow
// static relocations from the kernel descriptor to be used.
@@ -827,9 +909,12 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
Streamer.emitLabel(KernelDescriptorSymbol);
- Streamer.emitInt32(KernelDescriptor.group_segment_fixed_size);
- Streamer.emitInt32(KernelDescriptor.private_segment_fixed_size);
- Streamer.emitInt32(KernelDescriptor.kernarg_size);
+ Streamer.emitValue(KernelDescriptor.group_segment_fixed_size,
+ amdhsa::SIZEOF_GROUP_SEGMENT_FIXED_SIZE);
+ Streamer.emitValue(KernelDescriptor.private_segment_fixed_size,
+ amdhsa::SIZEOF_PRIVATE_SEGMENT_FIXED_SIZE);
+ Streamer.emitValue(KernelDescriptor.kernarg_size,
+ amdhsa::SIZEOF_KERNARG_SIZE);
for (uint8_t Res : KernelDescriptor.reserved0)
Streamer.emitInt8(Res);
@@ -838,20 +923,26 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
// expression being created is:
// (start of kernel code) - (start of kernel descriptor)
// It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
- Streamer.emitValue(MCBinaryExpr::createSub(
- MCSymbolRefExpr::create(
- KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
- MCSymbolRefExpr::create(
- KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context),
- Context),
- sizeof(KernelDescriptor.kernel_code_entry_byte_offset));
+ Streamer.emitValue(
+ MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(KernelCodeSymbol,
+ MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
+ MCSymbolRefExpr::create(KernelDescriptorSymbol,
+ MCSymbolRefExpr::VK_None, Context),
+ Context),
+ amdhsa::SIZEOF_KERNEL_CODE_ENTRY_BYTE_OFFSET);
for (uint8_t Res : KernelDescriptor.reserved1)
Streamer.emitInt8(Res);
- Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc3);
- Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc1);
- Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc2);
- Streamer.emitInt16(KernelDescriptor.kernel_code_properties);
- Streamer.emitInt16(KernelDescriptor.kernarg_preload);
+ Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc3,
+ amdhsa::SIZEOF_COMPUTE_PGM_RSRC3);
+ Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc1,
+ amdhsa::SIZEOF_COMPUTE_PGM_RSRC1);
+ Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc2,
+ amdhsa::SIZEOF_COMPUTE_PGM_RSRC2);
+ Streamer.emitValue(KernelDescriptor.kernel_code_properties,
+ amdhsa::SIZEOF_KERNEL_CODE_PROPERTIES);
+ Streamer.emitValue(KernelDescriptor.kernarg_preload,
+ amdhsa::SIZEOF_KERNARG_PRELOAD);
for (uint8_t Res : KernelDescriptor.reserved3)
Streamer.emitInt8(Res);
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDHSAKernelDescriptor.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDHSAKernelDescriptor.cpp
new file mode 100644
index 0000000000000..905723b7325bc
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDHSAKernelDescriptor.cpp
@@ -0,0 +1,32 @@
+//===--- AMDHSAKernelDescriptor.h -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/AMDHSAKernelDescriptor.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+
+using namespace llvm;
+using namespace llvm::amdhsa;
+
+void kernel_descriptor_t::bits_set(const MCExpr *&Dst, const MCExpr *Value,
+ uint32_t Shift, uint32_t Mask,
+ MCContext &Ctx) {
+ auto Sft = MCConstantExpr::create(Shift, Ctx);
+ auto Msk = MCConstantExpr::create(Mask, Ctx);
+ Dst = MCBinaryExpr::createAnd(Dst, MCUnaryExpr::createNot(Msk, Ctx), Ctx);
+ Dst = MCBinaryExpr::createOr(Dst, MCBinaryExpr::createShl(Value, Sft, Ctx),
+ Ctx);
+}
+
+const MCExpr *kernel_descriptor_t::bits_get(const MCExpr *Src, uint32_t Shift,
+ uint32_t Mask, MCContext &Ctx) {
+ auto Sft = MCConstantExpr::create(Shift, Ctx);
+ auto Msk = MCConstantExpr::create(Mask, Ctx);
+ return MCBinaryExpr::createLShr(MCBinaryExpr::createAnd(Src, Msk, Ctx), Sft,
+ Ctx);
+}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
index 5dc76071b0594..51df7bb3ebe84 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_component_library(LLVMAMDGPUDesc
AMDGPUMCCodeEmitter.cpp
AMDGPUMCTargetDesc.cpp
AMDGPUTargetStreamer.cpp
+ AMDHSAKernelDescriptor.cpp
R600InstPrinter.cpp
R600MCCodeEmitter.cpp
R600MCTargetDesc.cpp
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 33335ac75df76..4b37fc7569aa1 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -1186,44 +1187,65 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
}
}
-amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
- const MCSubtargetInfo *STI) {
+amdhsa::kernel_descriptor_t
+getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx) {
IsaVersion Version = getIsaVersion(STI->getCPU());
amdhsa::kernel_descriptor_t KD;
memset(&KD, 0, sizeof(KD));
-
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
- amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
- if (Version.Major >= 12) {
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, 0);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF, 0);
- } else {
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, 1);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, 1);
+ const MCExpr *ZeroMCExpr = MCConstantExpr::create(0, Ctx);
+ const MCExpr *OneMCExpr = MCConstantExpr::create(1, Ctx);
+
+ KD.group_segment_fixed_size = ZeroMCExpr;
+ KD.private_segment_fixed_size = ZeroMCExpr;
+ KD.compute_pgm_rsrc1 = ZeroMCExpr;
+ KD.compute_pgm_rsrc2 = ZeroMCExpr;
+ KD.compute_pgm_rsrc3 = ZeroMCExpr;
+ KD.kernarg_size = ZeroMCExpr;
+ KD.kernel_code_properties = ZeroMCExpr;
+ KD.kernarg_preload = ZeroMCExpr;
+
+ amdhsa::kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc1,
+ MCConstantExpr::create(amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE, Ctx),
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Ctx);
+ if (Version.Major < 12) {
+ amdhsa::kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc1, OneMCExpr,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Ctx);
+ amdhsa::kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc1, OneMCExpr,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Ctx);
}
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
+ amdhsa::kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc2, OneMCExpr,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Ctx);
if (Version.Major >= 10) {
- AMDHSA_BITS_SET(KD.kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
- STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
- STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1);
- }
- if (AMDGPU::isGFX90A(*STI)) {
- AMDHSA_BITS_SET(KD.compute_pgm_rsrc3,
- amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
- STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0);
+ if (STI->getFeatureBits().test(FeatureWavefrontSize32))
+ amdhsa::kernel_descriptor_t::bits_set(
+ KD.kernel_code_properties, OneMCExpr,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, Ctx);
+ if (!STI->getFeatureBits().test(FeatureCuMode))
+ amdhsa::kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc1, OneMCExpr,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Ctx);
+
+ amdhsa::kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc1, OneMCExpr,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Ctx);
}
+ if (AMDGPU::isGFX90A(*STI) && STI->getFeatureBits().test(FeatureTgSplit))
+ amdhsa::kernel_descriptor_t::bits_set(
+ KD.compute_pgm_rsrc3, OneMCExpr,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx);
return KD;
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f24b9f0e3615d..0567a2001fbb4 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -26,6 +26,7 @@ struct Align;
class Argument;
class Function;
class GlobalValue;
+class MCContext;
class MCInstrInfo;
class MCRegisterClass;
class MCRegisterInfo;
@@ -798,8 +799,8 @@ unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const MCSubtargetInfo *STI);
-amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
- const MCSubtargetInfo *STI);
+amdhsa::kernel_descriptor_t
+getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx);
bool isGroupSegment(const GlobalValue *GV);
bool isGlobalSegment(const GlobalValue *GV);
diff --git a/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s b/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s
index 8b90e20bb87d1..7b591904e877f 100644
--- a/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s
+++ b/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s
@@ -29,7 +29,7 @@
// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0030 00000c60 80000000 00000000 00000000
+// OBJDUMP-NEXT: 0030 00000c60 80000000 00040000 00000000
// complete
// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
@@ -39,12 +39,12 @@
// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000
+// OBJDUMP-NEXT: 00b0 00000060 80000000 00040000 00000000
// disabled_user_sgpr
// OBJDUMP-NEXT: 00c0 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00f0 00000c60 80000000 00000000 00000000
+// OBJDUMP-NEXT: 00f0 00000c60 80000000 00040000 00000000
.text
// ASM: .text
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs.s
new file mode 100644
index 0000000000000..a2764d40655bc
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs.s
@@ -0,0 +1,68 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=obj < %s > %t
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// When going from asm -> asm, the expressions should remain the same (i.e., symbolic).
+// When going from asm -> obj, the expressions should get resolved (through fixups),
+
+// OBJDUMP: Contents of section .rodata
+// expr_defined_later
+// OBJDUMP-NEXT: 0000 2b000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
+// expr_defined
+// OBJDUMP-NEXT: 0040 2d000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0070 0000ac00 80000000 00000000 00000000
+
+.text
+// ASM: .text
+
+.amdhsa_code_object_version 4
+// ASM: .amdhsa_code_object_version 4
+
+.p2align 8
+.type expr_defined_later, at function
+expr_defined_later:
+ s_endpgm
+
+.p2align 8
+.type expr_defined, at function
+expr_defined:
+ s_endpgm
+
+.rodata
+// ASM: .rodata
+
+.p2align 6
+.amdhsa_kernel expr_defined_later
+ .amdhsa_group_segment_fixed_size defined_value+2
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+.end_amdhsa_kernel
+
+.set defined_value, 41
+
+.p2align 6
+.amdhsa_kernel expr_defined
+ .amdhsa_group_segment_fixed_size defined_value+4
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+.end_amdhsa_kernel
+
+
+
+// ASM: .amdhsa_kernel expr_defined_later
+// ASM: .amdhsa_group_segment_fixed_size defined_value+2
+// ASM: .end_amdhsa_kernel
+
+// ASM: .set defined_value, 41
+// ASM-NEXT: .no_dead_strip defined_value
+
+// ASM: .amdhsa_kernel expr_defined
+// ASM: .amdhsa_group_segment_fixed_size 45
+// ASM: .end_amdhsa_kernel
diff --git a/llvm/test/MC/AMDGPU/hsa-tg-split.s b/llvm/test/MC/AMDGPU/hsa-tg-split.s
new file mode 100644
index 0000000000000..5a4d3e2c279c5
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-tg-split.s
@@ -0,0 +1,74 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack,+tgsplit < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack,+tgsplit -filetype=obj < %s > %t
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// OBJDUMP: Contents of section .rodata
+// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000100
+// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
+
+.text
+// ASM: .text
+
+.amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+"
+// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+"
+
+.amdhsa_code_object_version 4
+// ASM: .amdhsa_code_object_version 4
+
+.p2align 8
+.type minimal, at function
+minimal:
+ s_endpgm
+
+.rodata
+// ASM: .rodata
+
+.p2align 6
+.amdhsa_kernel minimal
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel minimal
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 0
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 0
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 0
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_accum_offset 4
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
+// ASM-NEXT: .amdhsa_float_round_mode_32 0
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 0
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 0
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3
+// ASM-NEXT: .amdhsa_dx10_clamp 1
+// ASM-NEXT: .amdhsa_ieee_mode 1
+// ASM-NEXT: .amdhsa_fp16_overflow 0
+// ASM-NEXT: .amdhsa_tg_split 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+// ASM-NEXT: .amdhsa_exception_int_div_zero 0
+// ASM-NEXT: .end_amdhsa_kernel
>From 6ce92b28af2e3037d76adaad8b2fed3000c03c2f Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Thu, 8 Feb 2024 12:26:44 +0000
Subject: [PATCH 2/3] Separate MCExpr kernel descriptor from
kernel_descriptor_t, add more hsa symbolic expression tests, apply feedback
---
.../llvm/Support/AMDHSAKernelDescriptor.h | 79 ++---
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 10 +-
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 11 +-
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 55 ++--
...iptor.cpp => AMDGPUMCKernelDescriptor.cpp} | 19 +-
.../MCTargetDesc/AMDGPUMCKernelDescriptor.h | 68 +++++
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 45 ++-
.../MCTargetDesc/AMDGPUTargetStreamer.h | 33 +-
.../Target/AMDGPU/MCTargetDesc/CMakeLists.txt | 2 +-
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 23 +-
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 9 +-
llvm/test/MC/AMDGPU/hsa-sym-expr-failure.s | 281 ++++++++++++++++++
llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s | 184 ++++++++++++
llvm/test/MC/AMDGPU/hsa-sym-exprs.s | 114 +++++--
14 files changed, 780 insertions(+), 153 deletions(-)
rename llvm/lib/Target/AMDGPU/MCTargetDesc/{AMDHSAKernelDescriptor.cpp => AMDGPUMCKernelDescriptor.cpp} (64%)
create mode 100644 llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h
create mode 100644 llvm/test/MC/AMDGPU/hsa-sym-expr-failure.s
create mode 100644 llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s
diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
index 9c5d8fa1c1a60..84cac3ef700e0 100644
--- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -52,10 +52,6 @@
#endif // AMDHSA_BITS_SET
namespace llvm {
-
-class MCContext;
-class MCExpr;
-
namespace amdhsa {
// Floating point rounding modes. Must match hardware definition.
@@ -242,40 +238,18 @@ enum : int32_t {
// Kernel descriptor. Must be kept backwards compatible.
struct kernel_descriptor_t {
- const MCExpr *group_segment_fixed_size;
- const MCExpr *private_segment_fixed_size;
- const MCExpr *kernarg_size;
+ uint32_t group_segment_fixed_size;
+ uint32_t private_segment_fixed_size;
+ uint32_t kernarg_size;
uint8_t reserved0[4];
int64_t kernel_code_entry_byte_offset;
uint8_t reserved1[20];
- const MCExpr *compute_pgm_rsrc3; // GFX10+ and GFX90A+
- const MCExpr *compute_pgm_rsrc1;
- const MCExpr *compute_pgm_rsrc2;
- const MCExpr *kernel_code_properties;
- const MCExpr *kernarg_preload;
+ uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+
+ uint32_t compute_pgm_rsrc1;
+ uint32_t compute_pgm_rsrc2;
+ uint16_t kernel_code_properties;
+ uint16_t kernarg_preload;
uint8_t reserved3[4];
-
- static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift,
- uint32_t Mask, MCContext &Ctx);
- static const MCExpr *bits_get(const MCExpr *Src, uint32_t Shift,
- uint32_t Mask, MCContext &Ctx);
-};
-
-// Sizes for kernel_descriptor_t properties, should add up to 64.
-enum : uint32_t {
- SIZEOF_GROUP_SEGMENT_FIXED_SIZE = 4,
- SIZEOF_PRIVATE_SEGMENT_FIXED_SIZE = 4,
- SIZEOF_KERNARG_SIZE = 4,
- SIZEOF_RESERVED0 = 4,
- SIZEOF_KERNEL_CODE_ENTRY_BYTE_OFFSET = 8,
- SIZEOF_RESERVED1 = 20,
- SIZEOF_COMPUTE_PGM_RSRC3 = 4,
- SIZEOF_COMPUTE_PGM_RSRC1 = 4,
- SIZEOF_COMPUTE_PGM_RSRC2 = 4,
- SIZEOF_KERNEL_CODE_PROPERTIES = 2,
- SIZEOF_KERNARG_PRELOAD = 2,
- SIZEOF_RESERVED3 = 4,
- SIZEOF_KERNEL_DESCRIPTOR = 64
};
enum : uint32_t {
@@ -293,6 +267,43 @@ enum : uint32_t {
RESERVED3_OFFSET = 60
};
+static_assert(
+ sizeof(kernel_descriptor_t) == 64,
+ "invalid size for kernel_descriptor_t");
+static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) ==
+ GROUP_SEGMENT_FIXED_SIZE_OFFSET,
+ "invalid offset for group_segment_fixed_size");
+static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
+ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
+ "invalid offset for private_segment_fixed_size");
+static_assert(offsetof(kernel_descriptor_t, kernarg_size) ==
+ KERNARG_SIZE_OFFSET,
+ "invalid offset for kernarg_size");
+static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
+ "invalid offset for reserved0");
+static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==
+ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET,
+ "invalid offset for kernel_code_entry_byte_offset");
+static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET,
+ "invalid offset for reserved1");
+static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) ==
+ COMPUTE_PGM_RSRC3_OFFSET,
+ "invalid offset for compute_pgm_rsrc3");
+static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) ==
+ COMPUTE_PGM_RSRC1_OFFSET,
+ "invalid offset for compute_pgm_rsrc1");
+static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) ==
+ COMPUTE_PGM_RSRC2_OFFSET,
+ "invalid offset for compute_pgm_rsrc2");
+static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) ==
+ KERNEL_CODE_PROPERTIES_OFFSET,
+ "invalid offset for kernel_code_properties");
+static_assert(offsetof(kernel_descriptor_t, kernarg_preload) ==
+ KERNARG_PRELOAD_OFFSET,
+ "invalid offset for kernarg_preload");
+static_assert(offsetof(kernel_descriptor_t, reserved3) == RESERVED3_OFFSET,
+ "invalid offset for reserved3");
+
} // end namespace amdhsa
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index d68c7e499f62c..33f6b46dfd62f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -22,6 +22,7 @@
#include "AMDKernelCodeT.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "R600AsmPrinter.h"
#include "SIMachineFunctionInfo.h"
@@ -420,15 +421,14 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
return KernelCodeProperties;
}
-amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
- const MachineFunction &MF,
- const SIProgramInfo &PI) const {
+MCKernelDescriptor
+AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF,
+ const SIProgramInfo &PI) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const Function &F = MF.getFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- amdhsa::kernel_descriptor_t KernelDescriptor;
- memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
+ MCKernelDescriptor KernelDescriptor;
assert(isUInt<32>(PI.ScratchSize));
assert(isUInt<32>(PI.getComputePGMRSrc1(STM)));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 79326cd3d3289..b8b2718d293e6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -28,15 +28,12 @@ class MCCodeEmitter;
class MCOperand;
namespace AMDGPU {
+struct MCKernelDescriptor;
namespace HSAMD {
class MetadataStreamer;
}
} // namespace AMDGPU
-namespace amdhsa {
-struct kernel_descriptor_t;
-}
-
class AMDGPUAsmPrinter final : public AsmPrinter {
private:
unsigned CodeObjectVersion;
@@ -75,9 +72,9 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
uint16_t getAmdhsaKernelCodeProperties(
const MachineFunction &MF) const;
- amdhsa::kernel_descriptor_t getAmdhsaKernelDescriptor(
- const MachineFunction &MF,
- const SIProgramInfo &PI) const;
+ AMDGPU::MCKernelDescriptor
+ getAmdhsaKernelDescriptor(const MachineFunction &MF,
+ const SIProgramInfo &PI) const;
void initTargetStreamer(Module &M);
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 2331af628fb73..069ac30939ae7 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "AMDKernelCodeT.h"
+#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "SIDefines.h"
@@ -5236,7 +5237,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (getParser().parseIdentifier(KernelName))
return true;
- kernel_descriptor_t KD =
+ AMDGPU::MCKernelDescriptor KD =
getDefaultAmdhsaKernelDescriptor(&getSTI(), getContext());
StringSet<> Seen;
@@ -5295,11 +5296,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
if (!isUInt<ENTRY##_WIDTH>(Val)) \
return OutOfRangeError(RANGE); \
- kernel_descriptor_t::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
- getContext());
+ AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
+ getContext());
-#define EXPR_SHOULD_RESOLVE() \
- if (!EvaluatableExpr) \
+// Some fields use the parsed value immediately which requires the expression to
+// be solvable.
+#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
+ if (!(RESOLVED)) \
return Error(IDRange.Start, "directive should have resolvable expression", \
IDRange);
@@ -5316,10 +5319,10 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return OutOfRangeError(ValRange);
KD.kernarg_size = ExprVal;
} else if (ID == ".amdhsa_user_sgpr_count") {
- EXPR_SHOULD_RESOLVE();
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
ExplicitUserSGPRCount = Val;
} else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
- EXPR_SHOULD_RESOLVE();
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
@@ -5330,7 +5333,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (Val)
ImpliedUserSGPRCount += 4;
} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
- EXPR_SHOULD_RESOLVE();
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (!hasKernargPreload())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
@@ -5343,7 +5346,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
PreloadLength = Val;
}
} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
- EXPR_SHOULD_RESOLVE();
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (!hasKernargPreload())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
@@ -5354,28 +5357,28 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (Val)
PreloadOffset = Val;
} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
- EXPR_SHOULD_RESOLVE();
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
- EXPR_SHOULD_RESOLVE();
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
- EXPR_SHOULD_RESOLVE();
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
- EXPR_SHOULD_RESOLVE();
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
ValRange);
@@ -5386,21 +5389,21 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
- EXPR_SHOULD_RESOLVE();
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
- EXPR_SHOULD_RESOLVE();
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 1;
} else if (ID == ".amdhsa_wavefront_size32") {
- EXPR_SHOULD_RESOLVE();
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
EnableWavefrontSize32 = Val;
@@ -5449,25 +5452,25 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
ValRange);
} else if (ID == ".amdhsa_next_free_vgpr") {
- EXPR_SHOULD_RESOLVE();
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
VGPRRange = ValRange;
NextFreeVGPR = Val;
} else if (ID == ".amdhsa_next_free_sgpr") {
- EXPR_SHOULD_RESOLVE();
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
SGPRRange = ValRange;
NextFreeSGPR = Val;
} else if (ID == ".amdhsa_accum_offset") {
if (!isGFX90A())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
- EXPR_SHOULD_RESOLVE();
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
AccumOffset = Val;
} else if (ID == ".amdhsa_reserve_vcc") {
- EXPR_SHOULD_RESOLVE();
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (!isUInt<1>(Val))
return OutOfRangeError(ValRange);
ReserveVCC = Val;
} else if (ID == ".amdhsa_reserve_flat_scratch") {
- EXPR_SHOULD_RESOLVE();
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (IVersion.Major < 7)
return Error(IDRange.Start, "directive requires gfx7+", IDRange);
if (hasArchitectedFlatScratch())
@@ -5543,7 +5546,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
ValRange);
} else if (ID == ".amdhsa_shared_vgpr_count") {
- EXPR_SHOULD_RESOLVE();
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (IVersion.Major < 10 || IVersion.Major >= 12)
return Error(IDRange.Start, "directive requires gfx10 or gfx11",
IDRange);
@@ -5612,7 +5615,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
VGPRBlocks))
return OutOfRangeError(VGPRRange);
- kernel_descriptor_t::bits_set(
+ AMDGPU::MCKernelDescriptor::bits_set(
KD.compute_pgm_rsrc1, MCConstantExpr::create(VGPRBlocks, getContext()),
COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
@@ -5620,7 +5623,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
SGPRBlocks))
return OutOfRangeError(SGPRRange);
- kernel_descriptor_t::bits_set(
+ AMDGPU::MCKernelDescriptor::bits_set(
KD.compute_pgm_rsrc1, MCConstantExpr::create(SGPRBlocks, getContext()),
COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
@@ -5634,7 +5637,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
return TokError("too many user SGPRs enabled");
- kernel_descriptor_t::bits_set(
+ AMDGPU::MCKernelDescriptor::bits_set(
KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
@@ -5656,7 +5659,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
"increments of 4");
if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
return TokError("accum_offset exceeds total VGPR allocation");
- kernel_descriptor_t::bits_set(
+ MCKernelDescriptor::bits_set(
KD.compute_pgm_rsrc3,
MCConstantExpr::create(AccumOffset / 4 - 1, getContext()),
COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDHSAKernelDescriptor.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp
similarity index 64%
rename from llvm/lib/Target/AMDGPU/MCTargetDesc/AMDHSAKernelDescriptor.cpp
rename to llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp
index 905723b7325bc..252bcc20d8840 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDHSAKernelDescriptor.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp
@@ -6,16 +6,19 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Support/AMDHSAKernelDescriptor.h"
+#include "AMDGPUMCKernelDescriptor.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
using namespace llvm;
-using namespace llvm::amdhsa;
+using namespace llvm::AMDGPU;
-void kernel_descriptor_t::bits_set(const MCExpr *&Dst, const MCExpr *Value,
- uint32_t Shift, uint32_t Mask,
- MCContext &Ctx) {
+// MCExpr for:
+// Dst = Dst & ~Mask
+// Dst = Dst | (Value << Shift)
+void MCKernelDescriptor::bits_set(const MCExpr *&Dst, const MCExpr *Value,
+ uint32_t Shift, uint32_t Mask,
+ MCContext &Ctx) {
auto Sft = MCConstantExpr::create(Shift, Ctx);
auto Msk = MCConstantExpr::create(Mask, Ctx);
Dst = MCBinaryExpr::createAnd(Dst, MCUnaryExpr::createNot(Msk, Ctx), Ctx);
@@ -23,8 +26,10 @@ void kernel_descriptor_t::bits_set(const MCExpr *&Dst, const MCExpr *Value,
Ctx);
}
-const MCExpr *kernel_descriptor_t::bits_get(const MCExpr *Src, uint32_t Shift,
- uint32_t Mask, MCContext &Ctx) {
+// MCExpr for:
+// return (Src & Mask) >> Shift
+const MCExpr *MCKernelDescriptor::bits_get(const MCExpr *Src, uint32_t Shift,
+ uint32_t Mask, MCContext &Ctx) {
auto Sft = MCConstantExpr::create(Shift, Ctx);
auto Msk = MCConstantExpr::create(Mask, Ctx);
return MCBinaryExpr::createLShr(MCBinaryExpr::createAnd(Src, Msk, Ctx), Sft,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h
new file mode 100644
index 0000000000000..1b4dc226886b1
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h
@@ -0,0 +1,68 @@
+//===--- AMDGPUMCKernelDescriptor.h ---------------------------*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// AMDHSA kernel descriptor MCExpr struct for use in MC layer. Uses
+/// AMDHSAKernelDescriptor.h for sizes and constants.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELDESCRIPTOR_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELDESCRIPTOR_H
+
+#include "llvm/Support/AMDHSAKernelDescriptor.h"
+
+namespace llvm {
+class MCExpr;
+class MCContext;
+namespace AMDGPU {
+
+struct MCKernelDescriptor {
+ const MCExpr *group_segment_fixed_size;
+ const MCExpr *private_segment_fixed_size;
+ const MCExpr *kernarg_size;
+ const MCExpr *compute_pgm_rsrc3;
+ const MCExpr *compute_pgm_rsrc1;
+ const MCExpr *compute_pgm_rsrc2;
+ const MCExpr *kernel_code_properties;
+ const MCExpr *kernarg_preload;
+
+ static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift,
+ uint32_t Mask, MCContext &Ctx);
+ static const MCExpr *bits_get(const MCExpr *Src, uint32_t Shift,
+ uint32_t Mask, MCContext &Ctx);
+};
+
+enum : uint32_t {
+ SIZEOF_GROUP_SEGMENT_FIXED_SIZE =
+ sizeof(amdhsa::kernel_descriptor_t::group_segment_fixed_size),
+ SIZEOF_PRIVATE_SEGMENT_FIXED_SIZE =
+ sizeof(amdhsa::kernel_descriptor_t::private_segment_fixed_size),
+ SIZEOF_KERNARG_SIZE = sizeof(amdhsa::kernel_descriptor_t::kernarg_size),
+ SIZEOF_RESERVED0 = sizeof(amdhsa::kernel_descriptor_t::reserved0),
+ SIZEOF_KERNEL_CODE_ENTRY_BYTE_OFFSET =
+ sizeof(amdhsa::kernel_descriptor_t::kernel_code_entry_byte_offset),
+ SIZEOF_RESERVED1 = sizeof(amdhsa::kernel_descriptor_t::reserved1),
+ SIZEOF_COMPUTE_PGM_RSRC3 =
+ sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc3),
+ SIZEOF_COMPUTE_PGM_RSRC1 =
+ sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc1),
+ SIZEOF_COMPUTE_PGM_RSRC2 =
+ sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc2),
+ SIZEOF_KERNEL_CODE_PROPERTIES =
+ sizeof(amdhsa::kernel_descriptor_t::kernel_code_properties),
+ SIZEOF_KERNARG_PRELOAD = sizeof(amdhsa::kernel_descriptor_t::kernarg_preload),
+ SIZEOF_RESERVED3 = sizeof(amdhsa::kernel_descriptor_t::reserved3),
+ SIZEOF_KERNEL_DESCRIPTOR = sizeof(amdhsa::kernel_descriptor_t)
+};
+
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELDESCRIPTOR_H
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index b2d0657e49f07..de010c0e412fb 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetStreamer.h"
+#include "AMDGPUMCKernelDescriptor.h"
#include "AMDGPUPTNote.h"
#include "AMDKernelCodeT.h"
#include "Utils/AMDGPUBaseInfo.h"
@@ -299,7 +300,7 @@ bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
- const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
+ const MCKernelDescriptor &KD, uint64_t NextVGPR, uint64_t NextSGPR,
bool ReserveVCC, bool ReserveFlatScr) {
IsaVersion IVersion = getIsaVersion(STI.getCPU());
const MCAsmInfo *MAI = getContext().getAsmInfo();
@@ -309,9 +310,9 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
auto print_field = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask,
StringRef Directive) {
int64_t IVal;
- OS << "\t\t" << Directive << " ";
+ OS << "\t\t" << Directive << ' ';
const MCExpr *pgm_rsrc1_bits =
- amdhsa::kernel_descriptor_t::bits_get(Expr, Shift, Mask, getContext());
+ MCKernelDescriptor::bits_get(Expr, Shift, Mask, getContext());
if (pgm_rsrc1_bits->evaluateAsAbsolute(IVal)) {
OS << static_cast<uint64_t>(IVal);
} else {
@@ -421,7 +422,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
if (AMDGPU::isGFX90A(STI)) {
// MCExpr equivalent of taking the (accum_offset + 1) * 4.
- const MCExpr *accum_bits = amdhsa::kernel_descriptor_t::bits_get(
+ const MCExpr *accum_bits = MCKernelDescriptor::bits_get(
KD.compute_pgm_rsrc3,
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
@@ -883,7 +884,7 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
- const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
+ const MCKernelDescriptor &KernelDescriptor, uint64_t NextVGPR,
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) {
auto &Streamer = getStreamer();
auto &Context = Streamer.getContext();
@@ -901,7 +902,7 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
// Kernel descriptor symbol's type and size are fixed.
KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
KernelDescriptorSymbol->setSize(
- MCConstantExpr::create(amdhsa::SIZEOF_KERNEL_DESCRIPTOR, Context));
+ MCConstantExpr::create(SIZEOF_KERNEL_DESCRIPTOR, Context));
// The visibility of the kernel code symbol must be protected or less to allow
// static relocations from the kernel descriptor to be used.
@@ -910,14 +911,13 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
Streamer.emitLabel(KernelDescriptorSymbol);
Streamer.emitValue(KernelDescriptor.group_segment_fixed_size,
- amdhsa::SIZEOF_GROUP_SEGMENT_FIXED_SIZE);
+ SIZEOF_GROUP_SEGMENT_FIXED_SIZE);
Streamer.emitValue(KernelDescriptor.private_segment_fixed_size,
- amdhsa::SIZEOF_PRIVATE_SEGMENT_FIXED_SIZE);
- Streamer.emitValue(KernelDescriptor.kernarg_size,
- amdhsa::SIZEOF_KERNARG_SIZE);
+ SIZEOF_PRIVATE_SEGMENT_FIXED_SIZE);
+ Streamer.emitValue(KernelDescriptor.kernarg_size, SIZEOF_KERNARG_SIZE);
- for (uint8_t Res : KernelDescriptor.reserved0)
- Streamer.emitInt8(Res);
+ for (uint32_t i = 0; i < SIZEOF_RESERVED0; ++i)
+ Streamer.emitInt8(0u);
// FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
// expression being created is:
@@ -930,19 +930,18 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
MCSymbolRefExpr::create(KernelDescriptorSymbol,
MCSymbolRefExpr::VK_None, Context),
Context),
- amdhsa::SIZEOF_KERNEL_CODE_ENTRY_BYTE_OFFSET);
- for (uint8_t Res : KernelDescriptor.reserved1)
- Streamer.emitInt8(Res);
+ SIZEOF_KERNEL_CODE_ENTRY_BYTE_OFFSET);
+ for (uint32_t i = 0; i < SIZEOF_RESERVED1; ++i)
+ Streamer.emitInt8(0u);
Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc3,
- amdhsa::SIZEOF_COMPUTE_PGM_RSRC3);
+ SIZEOF_COMPUTE_PGM_RSRC3);
Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc1,
- amdhsa::SIZEOF_COMPUTE_PGM_RSRC1);
+ SIZEOF_COMPUTE_PGM_RSRC1);
Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc2,
- amdhsa::SIZEOF_COMPUTE_PGM_RSRC2);
+ SIZEOF_COMPUTE_PGM_RSRC2);
Streamer.emitValue(KernelDescriptor.kernel_code_properties,
- amdhsa::SIZEOF_KERNEL_CODE_PROPERTIES);
- Streamer.emitValue(KernelDescriptor.kernarg_preload,
- amdhsa::SIZEOF_KERNARG_PRELOAD);
- for (uint8_t Res : KernelDescriptor.reserved3)
- Streamer.emitInt8(Res);
+ SIZEOF_KERNEL_CODE_PROPERTIES);
+ Streamer.emitValue(KernelDescriptor.kernarg_preload, SIZEOF_KERNARG_PRELOAD);
+ for (uint32_t i = 0; i < SIZEOF_RESERVED3; ++i)
+ Streamer.emitInt8(0u);
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index ad5f27a33fcbd..b715468f328d5 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -22,15 +22,13 @@ class MCSymbol;
class formatted_raw_ostream;
namespace AMDGPU {
+
+struct MCKernelDescriptor;
namespace HSAMD {
struct Metadata;
}
} // namespace AMDGPU
-namespace amdhsa {
-struct kernel_descriptor_t;
-}
-
class AMDGPUTargetStreamer : public MCTargetStreamer {
AMDGPUPALMetadata PALMetadata;
@@ -93,10 +91,11 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
return true;
}
- virtual void EmitAmdhsaKernelDescriptor(
- const MCSubtargetInfo &STI, StringRef KernelName,
- const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
- uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) {}
+ virtual void
+ EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName,
+ const AMDGPU::MCKernelDescriptor &KernelDescriptor,
+ uint64_t NextVGPR, uint64_t NextSGPR,
+ bool ReserveVCC, bool ReserveFlatScr) {}
static StringRef getArchNameFromElfMach(unsigned ElfMach);
static unsigned getElfMach(StringRef GPU);
@@ -148,10 +147,11 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
/// \returns True on success, false on failure.
bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI) override;
- void EmitAmdhsaKernelDescriptor(
- const MCSubtargetInfo &STI, StringRef KernelName,
- const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
- uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override;
+ void
+ EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName,
+ const AMDGPU::MCKernelDescriptor &KernelDescriptor,
+ uint64_t NextVGPR, uint64_t NextSGPR,
+ bool ReserveVCC, bool ReserveFlatScr) override;
};
class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
@@ -202,10 +202,11 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
/// \returns True on success, false on failure.
bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI) override;
- void EmitAmdhsaKernelDescriptor(
- const MCSubtargetInfo &STI, StringRef KernelName,
- const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
- uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override;
+ void
+ EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName,
+ const AMDGPU::MCKernelDescriptor &KernelDescriptor,
+ uint64_t NextVGPR, uint64_t NextSGPR,
+ bool ReserveVCC, bool ReserveFlatScr) override;
};
}
#endif
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
index 51df7bb3ebe84..72f7603f087bf 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -7,7 +7,7 @@ add_llvm_component_library(LLVMAMDGPUDesc
AMDGPUMCCodeEmitter.cpp
AMDGPUMCTargetDesc.cpp
AMDGPUTargetStreamer.cpp
- AMDHSAKernelDescriptor.cpp
+ AMDGPUMCKernelDescriptor.cpp
R600InstPrinter.cpp
R600MCCodeEmitter.cpp
R600MCTargetDesc.cpp
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 4b37fc7569aa1..646cc2897bf17 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -10,6 +10,7 @@
#include "AMDGPU.h"
#include "AMDGPUAsmUtils.h"
#include "AMDKernelCodeT.h"
+#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/Attributes.h"
@@ -1187,11 +1188,11 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
}
}
-amdhsa::kernel_descriptor_t
-getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx) {
+MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI,
+ MCContext &Ctx) {
IsaVersion Version = getIsaVersion(STI->getCPU());
- amdhsa::kernel_descriptor_t KD;
+ MCKernelDescriptor KD;
memset(&KD, 0, sizeof(KD));
const MCExpr *ZeroMCExpr = MCConstantExpr::create(0, Ctx);
const MCExpr *OneMCExpr = MCConstantExpr::create(1, Ctx);
@@ -1205,44 +1206,44 @@ getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx) {
KD.kernel_code_properties = ZeroMCExpr;
KD.kernarg_preload = ZeroMCExpr;
- amdhsa::kernel_descriptor_t::bits_set(
+ MCKernelDescriptor::bits_set(
KD.compute_pgm_rsrc1,
MCConstantExpr::create(amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE, Ctx),
amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Ctx);
if (Version.Major < 12) {
- amdhsa::kernel_descriptor_t::bits_set(
+ MCKernelDescriptor::bits_set(
KD.compute_pgm_rsrc1, OneMCExpr,
amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Ctx);
- amdhsa::kernel_descriptor_t::bits_set(
+ MCKernelDescriptor::bits_set(
KD.compute_pgm_rsrc1, OneMCExpr,
amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Ctx);
}
- amdhsa::kernel_descriptor_t::bits_set(
+ MCKernelDescriptor::bits_set(
KD.compute_pgm_rsrc2, OneMCExpr,
amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Ctx);
if (Version.Major >= 10) {
if (STI->getFeatureBits().test(FeatureWavefrontSize32))
- amdhsa::kernel_descriptor_t::bits_set(
+ MCKernelDescriptor::bits_set(
KD.kernel_code_properties, OneMCExpr,
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, Ctx);
if (!STI->getFeatureBits().test(FeatureCuMode))
- amdhsa::kernel_descriptor_t::bits_set(
+ MCKernelDescriptor::bits_set(
KD.compute_pgm_rsrc1, OneMCExpr,
amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Ctx);
- amdhsa::kernel_descriptor_t::bits_set(
+ MCKernelDescriptor::bits_set(
KD.compute_pgm_rsrc1, OneMCExpr,
amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Ctx);
}
if (AMDGPU::isGFX90A(*STI) && STI->getFeatureBits().test(FeatureTgSplit))
- amdhsa::kernel_descriptor_t::bits_set(
+ MCKernelDescriptor::bits_set(
KD.compute_pgm_rsrc3, OneMCExpr,
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 0567a2001fbb4..9eeef696ae6df 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -35,12 +35,9 @@ class StringRef;
class Triple;
class raw_ostream;
-namespace amdhsa {
-struct kernel_descriptor_t;
-}
-
namespace AMDGPU {
+struct MCKernelDescriptor;
struct IsaVersion;
enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
@@ -799,8 +796,8 @@ unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const MCSubtargetInfo *STI);
-amdhsa::kernel_descriptor_t
-getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx);
+MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI,
+ MCContext &Ctx);
bool isGroupSegment(const GlobalValue *GV);
bool isGlobalSegment(const GlobalValue *GV);
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-expr-failure.s b/llvm/test/MC/AMDGPU/hsa-sym-expr-failure.s
new file mode 100644
index 0000000000000..fab3e893352b2
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-sym-expr-failure.s
@@ -0,0 +1,281 @@
+// RUN: not llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefix=ASM %s
+
+// Some expression currently require (immediately) solvable expressions, i.e.,
+// they don't depend on yet-unknown symbolic values.
+
+.text
+// ASM: .text
+
+.amdhsa_code_object_version 4
+// ASM: .amdhsa_code_object_version 4
+
+.p2align 8
+.type user_sgpr_count, at function
+user_sgpr_count:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel user_sgpr_count
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_count defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_count
+
+.p2align 8
+.type user_sgpr_private_segment_buffer, at function
+user_sgpr_private_segment_buffer:
+ s_endpgm
+
+.amdhsa_kernel user_sgpr_private_segment_buffer
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_private_segment_buffer defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer
+
+.p2align 8
+.type user_sgpr_kernarg_preload_length, at function
+user_sgpr_kernarg_preload_length:
+ s_endpgm
+
+.amdhsa_kernel user_sgpr_kernarg_preload_length
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_kernarg_preload_length defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length defined_boolean
+
+.p2align 8
+.type user_sgpr_kernarg_preload_offset, at function
+user_sgpr_kernarg_preload_offset:
+ s_endpgm
+
+.amdhsa_kernel user_sgpr_kernarg_preload_offset
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_kernarg_preload_offset defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset defined_boolean
+
+.p2align 8
+.type user_sgpr_dispatch_ptr, at function
+user_sgpr_dispatch_ptr:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel user_sgpr_dispatch_ptr
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_dispatch_ptr defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr
+
+.p2align 8
+.type user_sgpr_queue_ptr, at function
+user_sgpr_queue_ptr:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel user_sgpr_queue_ptr
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_queue_ptr defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr
+
+.p2align 8
+.type user_sgpr_kernarg_segment_ptr, at function
+user_sgpr_kernarg_segment_ptr:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel user_sgpr_kernarg_segment_ptr
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_kernarg_segment_ptr defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr
+
+.p2align 8
+.type user_sgpr_dispatch_id, at function
+user_sgpr_dispatch_id:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel user_sgpr_dispatch_id
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_dispatch_id defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id
+
+.p2align 8
+.type user_sgpr_flat_scratch_init, at function
+user_sgpr_flat_scratch_init:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel user_sgpr_flat_scratch_init
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_flat_scratch_init defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init
+
+.p2align 8
+.type user_sgpr_private_segment_size, at function
+user_sgpr_private_segment_size:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel user_sgpr_private_segment_size
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_user_sgpr_private_segment_size defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size
+
+.p2align 8
+.type wavefront_size32, at function
+wavefront_size32:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel wavefront_size32
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_wavefront_size32 defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_wavefront_size32
+
+.p2align 8
+.type next_free_vgpr, at function
+next_free_vgpr:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel next_free_vgpr
+ .amdhsa_next_free_vgpr defined_boolean
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_next_free_vgpr
+
+.p2align 8
+.type next_free_sgpr, at function
+next_free_sgpr:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel next_free_sgpr
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr defined_boolean
+ .amdhsa_accum_offset 4
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_next_free_sgpr
+
+.p2align 8
+.type accum_offset, at function
+accum_offset:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel accum_offset
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_accum_offset
+
+.p2align 8
+.type reserve_vcc, at function
+reserve_vcc:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel reserve_vcc
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_reserve_vcc defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_reserve_vcc
+
+.p2align 8
+.type reserve_flat_scratch, at function
+reserve_flat_scratch:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel reserve_flat_scratch
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_reserve_flat_scratch defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_reserve_flat_scratch
+
+.p2align 8
+.type shared_vgpr_count, at function
+shared_vgpr_count:
+ s_endpgm
+
+.p2align 6
+.amdhsa_kernel shared_vgpr_count
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+ .amdhsa_shared_vgpr_count defined_boolean
+.end_amdhsa_kernel
+
+// ASM: error: directive should have resolvable expression
+// ASM-NEXT: .amdhsa_shared_vgpr_count
+
+.set defined_boolean, 1
+
+// ASM: .set defined_boolean, 1
+// ASM-NEXT: .no_dead_strip defined_boolean
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s
new file mode 100644
index 0000000000000..449616d35186b
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s
@@ -0,0 +1,184 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 -filetype=obj < %s > %t
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// When going from asm -> asm, the expressions should remain the same (i.e., symbolic).
+// When going from asm -> obj, the expressions should get resolved (through fixups),
+
+// OBJDUMP: Contents of section .rodata
+// expr_defined_later
+// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0030 00f02fe4 811f007f 000c0000 00000000
+// expr_defined
+// OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000
+// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0070 00f02fe4 811f007f 000c0000 00000000
+
+.text
+// ASM: .text
+
+.amdhsa_code_object_version 4
+// ASM: .amdhsa_code_object_version 4
+
+.p2align 8
+.type expr_defined_later, at function
+expr_defined_later:
+ s_endpgm
+
+.p2align 8
+.type expr_defined, at function
+expr_defined:
+ s_endpgm
+
+.rodata
+// ASM: .rodata
+
+.p2align 6
+.amdhsa_kernel expr_defined_later
+ .amdhsa_group_segment_fixed_size defined_value+2
+ .amdhsa_private_segment_fixed_size defined_value+3
+ .amdhsa_system_vgpr_workitem_id defined_2_bits
+ .amdhsa_float_round_mode_32 defined_2_bits
+ .amdhsa_float_round_mode_16_64 defined_2_bits
+ .amdhsa_float_denorm_mode_32 defined_2_bits
+ .amdhsa_float_denorm_mode_16_64 defined_2_bits
+ .amdhsa_system_sgpr_workgroup_id_x defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_y defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_z defined_boolean
+ .amdhsa_system_sgpr_workgroup_info defined_boolean
+ .amdhsa_fp16_overflow defined_boolean
+ .amdhsa_workgroup_processor_mode defined_boolean
+ .amdhsa_memory_ordered defined_boolean
+ .amdhsa_forward_progress defined_boolean
+ .amdhsa_exception_fp_ieee_invalid_op defined_boolean
+ .amdhsa_exception_fp_denorm_src defined_boolean
+ .amdhsa_exception_fp_ieee_div_zero defined_boolean
+ .amdhsa_exception_fp_ieee_overflow defined_boolean
+ .amdhsa_exception_fp_ieee_underflow defined_boolean
+ .amdhsa_exception_fp_ieee_inexact defined_boolean
+ .amdhsa_exception_int_div_zero defined_boolean
+ .amdhsa_round_robin_scheduling defined_boolean
+ .amdhsa_enable_private_segment defined_boolean
+ .amdhsa_uses_dynamic_stack defined_boolean
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+.set defined_value, 41
+.set defined_2_bits, 3
+.set defined_boolean, 1
+
+.p2align 6
+.amdhsa_kernel expr_defined
+ .amdhsa_group_segment_fixed_size defined_value+1
+ .amdhsa_private_segment_fixed_size defined_value+2
+ .amdhsa_system_vgpr_workitem_id defined_2_bits
+ .amdhsa_float_round_mode_32 defined_2_bits
+ .amdhsa_float_round_mode_16_64 defined_2_bits
+ .amdhsa_float_denorm_mode_32 defined_2_bits
+ .amdhsa_float_denorm_mode_16_64 defined_2_bits
+ .amdhsa_system_sgpr_workgroup_id_x defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_y defined_boolean
+ .amdhsa_system_sgpr_workgroup_id_z defined_boolean
+ .amdhsa_system_sgpr_workgroup_info defined_boolean
+ .amdhsa_fp16_overflow defined_boolean
+ .amdhsa_workgroup_processor_mode defined_boolean
+ .amdhsa_memory_ordered defined_boolean
+ .amdhsa_forward_progress defined_boolean
+ .amdhsa_exception_fp_ieee_invalid_op defined_boolean
+ .amdhsa_exception_fp_denorm_src defined_boolean
+ .amdhsa_exception_fp_ieee_div_zero defined_boolean
+ .amdhsa_exception_fp_ieee_overflow defined_boolean
+ .amdhsa_exception_fp_ieee_underflow defined_boolean
+ .amdhsa_exception_fp_ieee_inexact defined_boolean
+ .amdhsa_exception_int_div_zero defined_boolean
+ .amdhsa_round_robin_scheduling defined_boolean
+ .amdhsa_enable_private_segment defined_boolean
+ .amdhsa_uses_dynamic_stack defined_boolean
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel expr_defined_later
+// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2
+// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&2)>>1
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&4)>>2
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&8)>>3
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&16)>>4
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&64)>>6
+// ASM-NEXT: .amdhsa_wavefront_size32 (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1024)>>10
+// ASM-NEXT: .amdhsa_enable_private_segment (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&512)>>9
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1024)>>10
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&6144)>>11
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18
+// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&67108864)>>26
+// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&536870912)>>29
+// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&1073741824)>>30
+// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&2147483648)>>31
+// ASM-NEXT: .amdhsa_round_robin_scheduling (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&16777216)>>24
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&33554432)>>25
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&67108864)>>26
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&134217728)>>27
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&268435456)>>28
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&536870912)>>29
+// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1073741824)>>30
+// ASM-NEXT: .end_amdhsa_kernel
+
+// ASM: .set defined_value, 41
+// ASM-NEXT: .no_dead_strip defined_value
+// ASM-NEXT: .set defined_2_bits, 3
+// ASM-NEXT: .no_dead_strip defined_2_bits
+// ASM-NEXT: .set defined_boolean, 1
+// ASM-NEXT: .no_dead_strip defined_boolean
+
+// ASM: .amdhsa_kernel expr_defined
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 42
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 43
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_wavefront_size32 1
+// ASM-NEXT: .amdhsa_enable_private_segment 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 3
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_float_round_mode_32 3
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 3
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 3
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3
+// ASM-NEXT: .amdhsa_fp16_overflow 1
+// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
+// ASM-NEXT: .amdhsa_memory_ordered 1
+// ASM-NEXT: .amdhsa_forward_progress 1
+// ASM-NEXT: .amdhsa_round_robin_scheduling 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
+// ASM-NEXT: .amdhsa_exception_int_div_zero 1
+// ASM-NEXT: .end_amdhsa_kernel
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs.s
index a2764d40655bc..b7f89239160fc 100644
--- a/llvm/test/MC/AMDGPU/hsa-sym-exprs.s
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs.s
@@ -7,15 +7,15 @@
// OBJDUMP: Contents of section .rodata
// expr_defined_later
-// OBJDUMP-NEXT: 0000 2b000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000100
+// OBJDUMP-NEXT: 0030 0000ac04 81000000 00000000 00000000
// expr_defined
-// OBJDUMP-NEXT: 0040 2d000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0040 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0070 0000ac00 80000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100
+// OBJDUMP-NEXT: 0070 0000ac04 81000000 00000000 00000000
.text
// ASM: .text
@@ -38,31 +38,111 @@ expr_defined:
.p2align 6
.amdhsa_kernel expr_defined_later
- .amdhsa_group_segment_fixed_size defined_value+2
+ .amdhsa_system_sgpr_private_segment_wavefront_offset defined_boolean
+ .amdhsa_dx10_clamp defined_boolean
+ .amdhsa_ieee_mode defined_boolean
+ .amdhsa_fp16_overflow defined_boolean
+ .amdhsa_tg_split defined_boolean
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 0
.amdhsa_accum_offset 4
.end_amdhsa_kernel
-.set defined_value, 41
+.set defined_boolean, 1
.p2align 6
.amdhsa_kernel expr_defined
- .amdhsa_group_segment_fixed_size defined_value+4
+ .amdhsa_system_sgpr_private_segment_wavefront_offset defined_boolean
+ .amdhsa_dx10_clamp defined_boolean
+ .amdhsa_ieee_mode defined_boolean
+ .amdhsa_fp16_overflow defined_boolean
+ .amdhsa_tg_split defined_boolean
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 0
.amdhsa_accum_offset 4
.end_amdhsa_kernel
-
-
// ASM: .amdhsa_kernel expr_defined_later
-// ASM: .amdhsa_group_segment_fixed_size defined_value+2
-// ASM: .end_amdhsa_kernel
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 0
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 0
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&512)>>9
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1024)>>10
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&6144)>>11
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_accum_offset (((((((0&(~65536))|(defined_boolean<<16))&(~63))|(0<<0))&63)>>0)+1)*4
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
+// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18
+// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21
+// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&8388608)>>23
+// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&67108864)>>26
+// ASM-NEXT: .amdhsa_tg_split (((((0&(~65536))|(defined_boolean<<16))&(~63))|(0<<0))&65536)>>16
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&16777216)>>24
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&33554432)>>25
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&67108864)>>26
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&134217728)>>27
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&268435456)>>28
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&536870912)>>29
+// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1073741824)>>30
+// ASM-NEXT: .end_amdhsa_kernel
-// ASM: .set defined_value, 41
-// ASM-NEXT: .no_dead_strip defined_value
+// ASM: .set defined_boolean, 1
+// ASM-NEXT: .no_dead_strip defined_boolean
// ASM: .amdhsa_kernel expr_defined
-// ASM: .amdhsa_group_segment_fixed_size 45
-// ASM: .end_amdhsa_kernel
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 0
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 0
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 0
+// ASM-NEXT: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_accum_offset 4
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
+// ASM-NEXT: .amdhsa_float_round_mode_32 0
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 0
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 0
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3
+// ASM-NEXT: .amdhsa_dx10_clamp 1
+// ASM-NEXT: .amdhsa_ieee_mode 1
+// ASM-NEXT: .amdhsa_fp16_overflow 1
+// ASM-NEXT: .amdhsa_tg_split 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+// ASM-NEXT: .amdhsa_exception_int_div_zero 0
+// ASM-NEXT: .end_amdhsa_kernel
>From 6c7b7f135cf9f17013d650505a057e2ef81e75c6 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Fri, 9 Feb 2024 10:52:59 +0000
Subject: [PATCH 3/3] Apply feedback: nullptr MCExpr, remove superfluous memset
---
.../MCTargetDesc/AMDGPUMCKernelDescriptor.h | 16 ++++++++--------
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 1 -
2 files changed, 8 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h
index 1b4dc226886b1..039bcb99a6f7e 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h
@@ -24,14 +24,14 @@ class MCContext;
namespace AMDGPU {
struct MCKernelDescriptor {
- const MCExpr *group_segment_fixed_size;
- const MCExpr *private_segment_fixed_size;
- const MCExpr *kernarg_size;
- const MCExpr *compute_pgm_rsrc3;
- const MCExpr *compute_pgm_rsrc1;
- const MCExpr *compute_pgm_rsrc2;
- const MCExpr *kernel_code_properties;
- const MCExpr *kernarg_preload;
+ const MCExpr *group_segment_fixed_size = nullptr;
+ const MCExpr *private_segment_fixed_size = nullptr;
+ const MCExpr *kernarg_size = nullptr;
+ const MCExpr *compute_pgm_rsrc3 = nullptr;
+ const MCExpr *compute_pgm_rsrc1 = nullptr;
+ const MCExpr *compute_pgm_rsrc2 = nullptr;
+ const MCExpr *kernel_code_properties = nullptr;
+ const MCExpr *kernarg_preload = nullptr;
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift,
uint32_t Mask, MCContext &Ctx);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 646cc2897bf17..56df3f6c9c62e 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1193,7 +1193,6 @@ MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI,
IsaVersion Version = getIsaVersion(STI->getCPU());
MCKernelDescriptor KD;
- memset(&KD, 0, sizeof(KD));
const MCExpr *ZeroMCExpr = MCConstantExpr::create(0, Ctx);
const MCExpr *OneMCExpr = MCConstantExpr::create(1, Ctx);
More information about the llvm-commits
mailing list