[llvm] [AMDGPU] MCExpr-ify MC layer kernel descriptor (PR #80855)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 6 08:28:11 PST 2024
================
@@ -302,91 +302,142 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
bool ReserveVCC, bool ReserveFlatScr) {
IsaVersion IVersion = getIsaVersion(STI.getCPU());
+ const MCAsmInfo *MAI = getContext().getAsmInfo();
OS << "\t.amdhsa_kernel " << KernelName << '\n';
-#define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \
- STREAM << "\t\t" << DIRECTIVE << " " \
- << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
-
- OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
- << '\n';
- OS << "\t\t.amdhsa_private_segment_fixed_size "
- << KD.private_segment_fixed_size << '\n';
- OS << "\t\t.amdhsa_kernarg_size " << KD.kernarg_size << '\n';
-
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_count", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT);
+ auto print_field = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask,
+ StringRef Directive) {
+ int64_t IVal;
+ OS << "\t\t" << Directive << " ";
+ const MCExpr *pgm_rsrc1_bits =
+ amdhsa::kernel_descriptor_t::bits_get(Expr, Shift, Mask, getContext());
+ if (pgm_rsrc1_bits->evaluateAsAbsolute(IVal)) {
+ OS << static_cast<uint64_t>(IVal);
+ } else {
+ pgm_rsrc1_bits->print(OS, MAI);
+ }
+ OS << '\n';
+ };
+
+ OS << "\t\t.amdhsa_group_segment_fixed_size ";
+ KD.group_segment_fixed_size->print(OS, MAI);
+ OS << '\n';
+
+ OS << "\t\t.amdhsa_private_segment_fixed_size ";
+ KD.private_segment_fixed_size->print(OS, MAI);
+ OS << '\n';
+
+ OS << "\t\t.amdhsa_kernarg_size ";
+ KD.kernarg_size->print(OS, MAI);
+ OS << '\n';
+
+ print_field(
+ KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, ".amdhsa_user_sgpr_count");
if (!hasArchitectedFlatScratch(STI))
- PRINT_FIELD(
- OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
+ print_field(
+ KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
+ ".amdhsa_user_sgpr_private_segment_buffer");
+ print_field(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
+ ".amdhsa_user_sgpr_dispatch_ptr");
+ print_field(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
+ ".amdhsa_user_sgpr_queue_ptr");
+ print_field(
+ KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
+ ".amdhsa_user_sgpr_kernarg_segment_ptr");
+ print_field(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
+ ".amdhsa_user_sgpr_dispatch_id");
if (!hasArchitectedFlatScratch(STI))
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
+ print_field(
+ KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
+ ".amdhsa_user_sgpr_flat_scratch_init");
if (hasKernargPreload(STI)) {
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_length ", KD,
- kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH);
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_offset ", KD,
- kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET);
+ print_field(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT,
+ amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
+ ".amdhsa_user_sgpr_kernarg_preload_length");
+ print_field(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT,
+ amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
+ ".amdhsa_user_sgpr_kernarg_preload_offset");
}
- PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+ print_field(
+ KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
+ ".amdhsa_user_sgpr_private_segment_size");
if (IVersion.Major >= 10)
- PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD,
- kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
+ print_field(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
+ ".amdhsa_wavefront_size32");
if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
- PRINT_FIELD(OS, ".amdhsa_uses_dynamic_stack", KD, kernel_code_properties,
- amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
- PRINT_FIELD(OS,
+ print_field(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
+ amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
+ ".amdhsa_uses_dynamic_stack");
+ print_field(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
(hasArchitectedFlatScratch(STI)
? ".amdhsa_enable_private_segment"
- : ".amdhsa_system_sgpr_private_segment_wavefront_offset"),
- KD, compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
- PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
- PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
- PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
- PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
- PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD,
- compute_pgm_rsrc2,
- amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
+ : ".amdhsa_system_sgpr_private_segment_wavefront_offset"));
+ print_field(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
+ ".amdhsa_system_sgpr_workgroup_id_x");
+ print_field(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
+ ".amdhsa_system_sgpr_workgroup_id_y");
+ print_field(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
+ ".amdhsa_system_sgpr_workgroup_id_z");
+ print_field(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
+ ".amdhsa_system_sgpr_workgroup_info");
+ print_field(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
+ ".amdhsa_system_vgpr_workitem_id");
// These directives are required.
OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n';
- if (AMDGPU::isGFX90A(STI))
- OS << "\t\t.amdhsa_accum_offset " <<
- (AMDHSA_BITS_GET(KD.compute_pgm_rsrc3,
- amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
- << '\n';
+ if (AMDGPU::isGFX90A(STI)) {
+ // MCExpr equivalent of taking the (accum_offset + 1) * 4.
+ const MCExpr *accum_bits = amdhsa::kernel_descriptor_t::bits_get(
+ KD.compute_pgm_rsrc3,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
+ accum_bits = MCBinaryExpr::createAdd(
+ accum_bits, MCConstantExpr::create(1, getContext()), getContext());
+ accum_bits = MCBinaryExpr::createMul(
+ accum_bits, MCConstantExpr::create(4, getContext()), getContext());
+ OS << "\t\t.amdhsa_accum_offset ";
+ int64_t IVal;
+ if (accum_bits->evaluateAsAbsolute(IVal)) {
+ OS << static_cast<uint64_t>(IVal);
+ } else {
+ accum_bits->print(OS, MAI);
----------------
arsenm wrote:
Why doesn't the print function take care of trying to resolve as absolute?
https://github.com/llvm/llvm-project/pull/80855
More information about the llvm-commits
mailing list