[llvm] 986001c - [AMDGPU] Improve assembler + disassembler handling of kernel descriptors
Scott Linder via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 6 14:21:02 PDT 2023
Author: Scott Linder
Date: 2023-07-06T21:20:51Z
New Revision: 986001c8274a3f31c3849c16d68ee36a04809986
URL: https://github.com/llvm/llvm-project/commit/986001c8274a3f31c3849c16d68ee36a04809986
DIFF: https://github.com/llvm/llvm-project/commit/986001c8274a3f31c3849c16d68ee36a04809986.diff
LOG: [AMDGPU] Improve assembler + disassembler handling of kernel descriptors
* Relax the AsmParser to accept `.amdhsa_wavefront_size32 0` when the
`.amdhsa_shared_vgpr_count` directive is present.
* Teach the KD disassembler to respect the setting of
KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32 when calculating the
value of `.amdhsa_next_free_vgpr`.
* Teach the KD disassembler to disassemble COMPUTE_PGM_RSRC3 for gfx90a
and gfx10+.
* Include "pseudo directive" comments for gfx10 fields which are not
controlled by any assembler directive.
* Fix disassembleObject failure diagnostic in llvm-objdump to not
hard-code a comment string, and to follow the convention of not
capitalizing the first sentence.
Reviewed By: rochauha
Differential Revision: https://reviews.llvm.org/D128014
Added:
llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s
llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s
Modified:
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s
llvm/tools/llvm-objdump/llvm-objdump.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index e040feb260a329..9035f6f0c6a305 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5237,7 +5237,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (IVersion.Major >= 10) {
// SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
- if (SharedVGPRCount && EnableWavefrontSize32) {
+ if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
return TokError("shared_vgpr_count directive not valid on "
"wavefront size 32");
}
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 123de0f38df934..54b52eb4f324bf 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -45,11 +45,9 @@ using namespace llvm;
using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
- MCContext &Ctx,
- MCInstrInfo const *MCII) :
- MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
- TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) {
-
+ MCContext &Ctx, MCInstrInfo const *MCII)
+ : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
+ MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)) {
// ToDo: AMDGPUDisassembler supports only VI ISA.
if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
report_fatal_error("Disassembly not yet supported for subtarget");
@@ -1632,6 +1630,11 @@ bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
do { \
KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
} while (0)
+#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
+ do { \
+ KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
+ << GET_FIELD(MASK) << '\n'; \
+ } while (0)
// NOLINTNEXTLINE(readability-identifier-naming)
MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
@@ -1647,8 +1650,9 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
uint32_t GranulatedWorkitemVGPRCount =
GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
- uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) *
- AMDGPU::IsaInfo::getVGPREncodingGranule(&STI);
+ uint32_t NextFreeVGPR =
+ (GranulatedWorkitemVGPRCount + 1) *
+ AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
@@ -1786,11 +1790,40 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
using namespace amdhsa;
- if (!isGFX10Plus() && FourByteBuffer) {
+ StringRef Indent = "\t";
+ if (isGFX90A()) {
+ KdStream << Indent << ".amdhsa_accum_offset "
+ << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
+ << '\n';
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED0)
+ return MCDisassembler::Fail;
+ PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED1)
+ return MCDisassembler::Fail;
+ } else if (isGFX10Plus()) {
+ if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
+ PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
+ COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
+ } else {
+ PRINT_PSEUDO_DIRECTIVE_COMMENT(
+ "SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
+ }
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
+ COMPUTE_PGM_RSRC3_GFX10_PLUS_INST_PREF_SIZE);
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
+ COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
+ COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_END);
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED0)
+ return MCDisassembler::Fail;
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
+ COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
+ } else if (FourByteBuffer) {
return MCDisassembler::Fail;
}
return MCDisassembler::Success;
}
+#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
#undef PRINT_DIRECTIVE
#undef GET_FIELD
@@ -1935,6 +1968,20 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor(
if (Bytes.size() != 64 || KdAddress % 64 != 0)
return MCDisassembler::Fail;
+ // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
+ // requires us to know the setting of .amdhsa_wavefront_size32 in order to
+ // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
+ // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
+ // when required.
+ if (isGFX10Plus()) {
+ uint16_t KernelCodeProperties =
+ support::endian::read16(&Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],
+ support::endianness::little);
+ EnableWavefrontSize32 =
+ AMDHSA_BITS_GET(KernelCodeProperties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
+ }
+
std::string Kd;
raw_string_ostream KdStream(Kd);
KdStream << ".amdhsa_kernel " << KdName << '\n';
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 7a307f6f2476f0..444312473a5ff4 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -25,6 +25,7 @@
namespace llvm {
+class MCAsmInfo;
class MCInst;
class MCOperand;
class MCSubtargetInfo;
@@ -92,10 +93,12 @@ class AMDGPUDisassembler : public MCDisassembler {
private:
std::unique_ptr<MCInstrInfo const> const MCII;
const MCRegisterInfo &MRI;
+ const MCAsmInfo &MAI;
const unsigned TargetMaxInstBytes;
mutable ArrayRef<uint8_t> Bytes;
mutable uint32_t Literal;
mutable bool HasLiteral;
+ mutable std::optional<bool> EnableWavefrontSize32;
public:
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s
new file mode 100644
index 00000000000000..52b399e4f0c56e
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s
@@ -0,0 +1,232 @@
+;; Test disassembly for gfx10 kernel descriptor.
+
+; RUN: rm -rf %t && split-file %s %t && cd %t
+
+;--- 1.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize32,-wavefrontsize64 -filetype=obj -mcpu=gfx1010 < 1.s > 1.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 1.o | tail -n +7 | tee 1-disasm.s | FileCheck 1.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize32,-wavefrontsize64 -filetype=obj -mcpu=gfx1010 < 1-disasm.s > 1-disasm.o
+; RUN: cmp 1.o 1-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: ; SHARED_VGPR_COUNT 0
+; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: ; TRAP_ON_START 0
+; CHECK-NEXT: ; TRAP_ON_END 0
+; CHECK-NEXT: ; IMAGE_OP 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1
+; CHECK-NEXT: .amdhsa_memory_ordered 1
+; CHECK-NEXT: .amdhsa_forward_progress 0
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_wavefront_size32 1
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+ .amdhsa_wavefront_size32 1
+.end_amdhsa_kernel
+
+;--- 2.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1010 < 2.s > 2.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 2.o | tail -n +7 | tee 2-disasm.s | FileCheck 2.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1010 < 2-disasm.s > 2-disasm.o
+; RUN: cmp 2.o 2-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_shared_vgpr_count 0
+; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: ; TRAP_ON_START 0
+; CHECK-NEXT: ; TRAP_ON_END 0
+; CHECK-NEXT: ; IMAGE_OP 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1
+; CHECK-NEXT: .amdhsa_memory_ordered 1
+; CHECK-NEXT: .amdhsa_forward_progress 0
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_wavefront_size32 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+ .amdhsa_shared_vgpr_count 0
+.end_amdhsa_kernel
+
+;--- 3.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1010 < 3.s > 3.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 3.o | tail -n +7 | tee 3-disasm.s | FileCheck 3.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1010 < 3-disasm.s > 3-disasm.o
+; RUN: cmp 3.o 3-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
+; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: ; TRAP_ON_START 0
+; CHECK-NEXT: ; TRAP_ON_END 0
+; CHECK-NEXT: ; IMAGE_OP 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1
+; CHECK-NEXT: .amdhsa_memory_ordered 1
+; CHECK-NEXT: .amdhsa_forward_progress 0
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_wavefront_size32 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+ .amdhsa_shared_vgpr_count 1
+.end_amdhsa_kernel
+
+;--- 4.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1010 < 4.s > 4.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 4.o | tail -n +7 | tee 4-disasm.s | FileCheck 4.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack,+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1010 < 4-disasm.s > 4-disasm.o
+; RUN: cmp 4.o 4-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
+; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: ; TRAP_ON_START 0
+; CHECK-NEXT: ; TRAP_ON_END 0
+; CHECK-NEXT: ; IMAGE_OP 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1
+; CHECK-NEXT: .amdhsa_memory_ordered 1
+; CHECK-NEXT: .amdhsa_forward_progress 0
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_wavefront_size32 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+ .amdhsa_shared_vgpr_count 1
+ .amdhsa_wavefront_size32 0
+.end_amdhsa_kernel
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s
new file mode 100644
index 00000000000000..42d87dee734d32
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s
@@ -0,0 +1,153 @@
+;; Test disassembly for gfx90a kernel descriptor.
+
+; RUN: rm -rf %t && split-file %s %t && cd %t
+
+;--- 1.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx90a < 1.s > 1.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 1.o | tail -n +7 | tee 1-disasm.s | FileCheck 1.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx90a < 1-disasm.s > 1-disasm.o
+; RUN: cmp 1.o 1-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_accum_offset 4
+; CHECK-NEXT: .amdhsa_tg_split 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 8
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+.end_amdhsa_kernel
+
+;--- 2.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx90a < 2.s > 2.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 2.o | tail -n +7 | tee 2-disasm.s | FileCheck 2.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx90a < 2-disasm.s > 2-disasm.o
+; RUN: cmp 2.o 2-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_accum_offset 8
+; CHECK-NEXT: .amdhsa_tg_split 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 8
+.end_amdhsa_kernel
+
+;--- 3.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx90a < 3.s > 3.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 3.o | tail -n +7 | tee 3-disasm.s | FileCheck 3.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=-xnack -filetype=obj -mcpu=gfx90a < 3-disasm.s > 3-disasm.o
+; RUN: cmp 3.o 3-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_accum_offset 12
+; CHECK-NEXT: .amdhsa_tg_split 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 12
+.end_amdhsa_kernel
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s
index 761e0162c6b5d4..04cf28f89e4482 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s
@@ -1,7 +1,7 @@
;; Entirely zeroed kernel descriptor (for GFX10).
; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack -filetype=obj -o %t
-; RUN: llvm-objdump -s -j .text %t | FileCheck --check-prefix=OBJDUMP %s
+; RUN: llvm-objdump -s -d -j .text %t | FileCheck --check-prefix=OBJDUMP %s
;; TODO:
;; This file and kd-zeroed-raw.s should produce the same output for the kernel
@@ -11,10 +11,62 @@
;; Check the raw bytes right now.
-; OBJDUMP: 0000 00000000 00000000 00000000 00000000
+; OBJDUMP-LABEL: Contents of section .text:
+; OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
; OBJDUMP-NEXT: 0030 01000000 00000000 00000000 00000000
+; OBJDUMP-EMPTY:
+
+; OBJDUMP-LABEL: Disassembly of section .text:
+; OBJDUMP-EMPTY:
+; OBJDUMP-NEXT: 0000000000000000 <my_kernel.kd>:
+; OBJDUMP-NEXT: .amdhsa_kernel my_kernel
+; OBJDUMP-NEXT: .amdhsa_group_segment_fixed_size 0
+; OBJDUMP-NEXT: .amdhsa_private_segment_fixed_size 0
+; OBJDUMP-NEXT: .amdhsa_kernarg_size 0
+; OBJDUMP-NEXT: .amdhsa_shared_vgpr_count 0
+; OBJDUMP-NEXT: ; INST_PREF_SIZE 0
+; OBJDUMP-NEXT: ; TRAP_ON_START 0
+; OBJDUMP-NEXT: ; TRAP_ON_END 0
+; OBJDUMP-NEXT: ; IMAGE_OP 0
+; OBJDUMP-NEXT: .amdhsa_next_free_vgpr 8
+; OBJDUMP-NEXT: .amdhsa_reserve_vcc 0
+; OBJDUMP-NEXT: .amdhsa_reserve_flat_scratch 0
+; OBJDUMP-NEXT: .amdhsa_reserve_xnack_mask 0
+; OBJDUMP-NEXT: .amdhsa_next_free_sgpr 8
+; OBJDUMP-NEXT: .amdhsa_float_round_mode_32 0
+; OBJDUMP-NEXT: .amdhsa_float_round_mode_16_64 0
+; OBJDUMP-NEXT: .amdhsa_float_denorm_mode_32 0
+; OBJDUMP-NEXT: .amdhsa_float_denorm_mode_16_64 0
+; OBJDUMP-NEXT: .amdhsa_dx10_clamp 0
+; OBJDUMP-NEXT: .amdhsa_ieee_mode 0
+; OBJDUMP-NEXT: .amdhsa_fp16_overflow 0
+; OBJDUMP-NEXT: .amdhsa_workgroup_processor_mode 0
+; OBJDUMP-NEXT: .amdhsa_memory_ordered 0
+; OBJDUMP-NEXT: .amdhsa_forward_progress 0
+; OBJDUMP-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; OBJDUMP-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
+; OBJDUMP-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; OBJDUMP-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; OBJDUMP-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; OBJDUMP-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; OBJDUMP-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; OBJDUMP-NEXT: .amdhsa_exception_fp_denorm_src 0
+; OBJDUMP-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; OBJDUMP-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; OBJDUMP-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; OBJDUMP-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; OBJDUMP-NEXT: .amdhsa_exception_int_div_zero 0
+; OBJDUMP-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+; OBJDUMP-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; OBJDUMP-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; OBJDUMP-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; OBJDUMP-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; OBJDUMP-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+; OBJDUMP-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; OBJDUMP-NEXT: .amdhsa_wavefront_size32 0
+; OBJDUMP-NEXT: .end_amdhsa_kernel
.amdhsa_kernel my_kernel
.amdhsa_group_segment_fixed_size 0
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index d77b0006049e98..bce76eea48f837 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -1718,8 +1718,9 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj,
// distance to the next symbol, and sometimes it will be just a
// prologue and we should start disassembling instructions from where
// it left off.
- outs() << "// Error in decoding " << SymNamesHere[SHI]
- << " : Decoding failed region as bytes.\n";
+ outs() << Ctx.getAsmInfo()->getCommentString()
+ << " error in decoding " << SymNamesHere[SHI]
+ << " : decoding failed region as bytes.\n";
for (uint64_t I = 0; I < Size; ++I) {
outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true)
<< "\n";
More information about the llvm-commits
mailing list