[llvm] 6cfb642 - AMDGPU: Minor updates to program resource registers (#69525)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 19 09:40:23 PDT 2023
Author: Konstantin Zhuravlyov
Date: 2023-10-19T12:40:19-04:00
New Revision: 6cfb64276d47b6c24f85311ddcc80fa8d703d76b
URL: https://github.com/llvm/llvm-project/commit/6cfb64276d47b6c24f85311ddcc80fa8d703d76b
DIFF: https://github.com/llvm/llvm-project/commit/6cfb64276d47b6c24f85311ddcc80fa8d703d76b.diff
LOG: AMDGPU: Minor updates to program resource registers (#69525)
- Be explicit about which program resource register is supported by
which target
- RSRC1
- FP16_OVFL is GFX9+
- WGP_MODE is GFX10+
- MEM_ORDERED is GFX10+
- FWD_PROGRESS is GFX10+
- RSRC3
- INST_PREF_SIZE is GFX11+
- TRAP_ON_START is GFX11+
- TRAP_ON_END is GFX11+
- IMAGE_OP is GFX11+
- Do not emit GFX11+ fields when disassembling GFX10 code objects
- Tighten enforcement of reserved bits in disassembler
---------
Co-authored-by: Konstantin Zhuravlyov <kzhuravl at amd.com>
Added:
llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s
Modified:
llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s
llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s
Removed:
################################################################################
diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
index 1bd65471d3b7c90..0574f96e6e15c4f 100644
--- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -79,8 +79,21 @@ enum : uint8_t {
};
// Compute program resource register 1. Must match hardware definition.
+// GFX6+.
#define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH)
+// [GFX6-GFX8].
+#define COMPUTE_PGM_RSRC1_GFX6_GFX8(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX8_ ## NAME, SHIFT, WIDTH)
+// [GFX6-GFX9].
+#define COMPUTE_PGM_RSRC1_GFX6_GFX9(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX9_ ## NAME, SHIFT, WIDTH)
+// GFX9+.
+#define COMPUTE_PGM_RSRC1_GFX9_PLUS(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX9_PLUS_ ## NAME, SHIFT, WIDTH)
+// GFX10+.
+#define COMPUTE_PGM_RSRC1_GFX10_PLUS(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
@@ -95,11 +108,13 @@ enum : int32_t {
COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1),
COMPUTE_PGM_RSRC1(BULKY, 24, 1),
COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
- COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+
- COMPUTE_PGM_RSRC1(RESERVED0, 27, 2),
- COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1), // GFX10+
- COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1), // GFX10+
- COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+
+ COMPUTE_PGM_RSRC1_GFX6_GFX8(RESERVED0, 26, 1),
+ COMPUTE_PGM_RSRC1_GFX9_PLUS(FP16_OVFL, 26, 1),
+ COMPUTE_PGM_RSRC1(RESERVED1, 27, 2),
+ COMPUTE_PGM_RSRC1_GFX6_GFX9(RESERVED2, 29, 3),
+ COMPUTE_PGM_RSRC1_GFX10_PLUS(WGP_MODE, 29, 1),
+ COMPUTE_PGM_RSRC1_GFX10_PLUS(MEM_ORDERED, 30, 1),
+ COMPUTE_PGM_RSRC1_GFX10_PLUS(FWD_PROGRESS, 31, 1),
};
#undef COMPUTE_PGM_RSRC1
@@ -143,15 +158,24 @@ enum : int32_t {
// Compute program resource register 3 for GFX10+. Must match hardware
// definition.
+// [GFX10].
+#define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_ ## NAME, SHIFT, WIDTH)
+// GFX10+.
#define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
+// GFX11+.
+#define COMPUTE_PGM_RSRC3_GFX11_PLUS(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_PLUS_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
- COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4), // GFX10+
- COMPUTE_PGM_RSRC3_GFX10_PLUS(INST_PREF_SIZE, 4, 6), // GFX11+
- COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_START, 10, 1), // GFX11+
- COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_END, 11, 1), // GFX11+
- COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED0, 12, 19),
- COMPUTE_PGM_RSRC3_GFX10_PLUS(IMAGE_OP, 31, 1), // GFX11+
+ COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4),
+ COMPUTE_PGM_RSRC3_GFX10(RESERVED0, 4, 8),
+ COMPUTE_PGM_RSRC3_GFX11_PLUS(INST_PREF_SIZE, 4, 6),
+ COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_START, 10, 1),
+ COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_END, 11, 1),
+ COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED1, 12, 19),
+ COMPUTE_PGM_RSRC3_GFX10(RESERVED2, 31, 1),
+ COMPUTE_PGM_RSRC3_GFX11_PLUS(IMAGE_OP, 31, 1),
};
#undef COMPUTE_PGM_RSRC3_GFX10_PLUS
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index faeaa94f9733576..9e143c77b606c34 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5242,7 +5242,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
} else if (ID == ".amdhsa_fp16_overflow") {
if (IVersion.Major < 9)
return Error(IDRange.Start, "directive requires gfx9+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
ValRange);
} else if (ID == ".amdhsa_tg_split") {
if (!isGFX90A())
@@ -5252,17 +5252,17 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
} else if (ID == ".amdhsa_workgroup_processor_mode") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
ValRange);
} else if (ID == ".amdhsa_memory_ordered") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
ValRange);
} else if (ID == ".amdhsa_forward_progress") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
ValRange);
} else if (ID == ".amdhsa_shared_vgpr_count") {
if (IVersion.Major < 10)
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index d74fd0b3a9ea74e..1b301ee5f49b216 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -1818,16 +1818,23 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
return MCDisassembler::Fail;
- PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL);
+ if (isGFX9Plus())
+ PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
- if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0)
+ if (!isGFX9Plus())
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0)
+ return MCDisassembler::Fail;
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED1)
return MCDisassembler::Fail;
+ if (!isGFX10Plus())
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2)
+ return MCDisassembler::Fail;
if (isGFX10Plus()) {
PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
- COMPUTE_PGM_RSRC1_WGP_MODE);
- PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED);
- PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS);
+ COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
+ PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
+ PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
}
return MCDisassembler::Success;
}
@@ -1908,16 +1915,29 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
PRINT_PSEUDO_DIRECTIVE_COMMENT(
"SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
}
- PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
- COMPUTE_PGM_RSRC3_GFX10_PLUS_INST_PREF_SIZE);
- PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
- COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
- PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
- COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_END);
- if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED0)
+
+ if (isGFX11Plus()) {
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
+ COMPUTE_PGM_RSRC3_GFX11_PLUS_INST_PREF_SIZE);
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
+ COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
+ COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_END);
+ } else {
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED0)
+ return MCDisassembler::Fail;
+ }
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED1)
return MCDisassembler::Fail;
- PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
- COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
+
+ if (isGFX11Plus()) {
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
+ COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
+ } else {
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED2)
+ return MCDisassembler::Fail;
+ }
} else if (FourByteBuffer) {
return MCDisassembler::Fail;
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 6b8c03c1620d26b..70350b83849aaae 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -453,7 +453,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
if (IVersion.Major >= 9)
PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
+ amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
if (AMDGPU::isGFX90A(STI))
PRINT_FIELD(OS, ".amdhsa_tg_split", KD,
compute_pgm_rsrc3,
@@ -461,13 +461,13 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
if (IVersion.Major >= 10) {
PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
amdhsa::COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index d123b384a27d4cc..954f21b5ec49bda 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1163,10 +1163,10 @@ amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1);
}
if (AMDGPU::isGFX90A(*STI)) {
AMDHSA_BITS_SET(KD.compute_pgm_rsrc3,
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s
index 52b399e4f0c56e2..58b01031afe383e 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s
@@ -12,10 +12,6 @@
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: ; SHARED_VGPR_COUNT 0
-; CHECK-NEXT: ; INST_PREF_SIZE 0
-; CHECK-NEXT: ; TRAP_ON_START 0
-; CHECK-NEXT: ; TRAP_ON_END 0
-; CHECK-NEXT: ; IMAGE_OP 0
; CHECK-NEXT: .amdhsa_next_free_vgpr 32
; CHECK-NEXT: .amdhsa_reserve_vcc 0
; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
@@ -69,10 +65,6 @@
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: .amdhsa_shared_vgpr_count 0
-; CHECK-NEXT: ; INST_PREF_SIZE 0
-; CHECK-NEXT: ; TRAP_ON_START 0
-; CHECK-NEXT: ; TRAP_ON_END 0
-; CHECK-NEXT: ; IMAGE_OP 0
; CHECK-NEXT: .amdhsa_next_free_vgpr 32
; CHECK-NEXT: .amdhsa_reserve_vcc 0
; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
@@ -126,10 +118,6 @@
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
-; CHECK-NEXT: ; INST_PREF_SIZE 0
-; CHECK-NEXT: ; TRAP_ON_START 0
-; CHECK-NEXT: ; TRAP_ON_END 0
-; CHECK-NEXT: ; IMAGE_OP 0
; CHECK-NEXT: .amdhsa_next_free_vgpr 32
; CHECK-NEXT: .amdhsa_reserve_vcc 0
; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
@@ -183,10 +171,6 @@
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
-; CHECK-NEXT: ; INST_PREF_SIZE 0
-; CHECK-NEXT: ; TRAP_ON_START 0
-; CHECK-NEXT: ; TRAP_ON_END 0
-; CHECK-NEXT: ; IMAGE_OP 0
; CHECK-NEXT: .amdhsa_next_free_vgpr 32
; CHECK-NEXT: .amdhsa_reserve_vcc 0
; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s
new file mode 100644
index 000000000000000..2133002908d9fc8
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s
@@ -0,0 +1,220 @@
+;; Test disassembly for gfx11 kernel descriptor.
+
+; RUN: rm -rf %t && split-file %s %t && cd %t
+
+;--- 1.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize32,-wavefrontsize64 -filetype=obj -mcpu=gfx1100 < 1.s > 1.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 1.o | tail -n +7 | tee 1-disasm.s | FileCheck 1.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize32,-wavefrontsize64 -filetype=obj -mcpu=gfx1100 < 1-disasm.s > 1-disasm.o
+; RUN: cmp 1.o 1-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: ; SHARED_VGPR_COUNT 0
+; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: ; TRAP_ON_START 0
+; CHECK-NEXT: ; TRAP_ON_END 0
+; CHECK-NEXT: ; IMAGE_OP 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1
+; CHECK-NEXT: .amdhsa_memory_ordered 1
+; CHECK-NEXT: .amdhsa_forward_progress 0
+; CHECK-NEXT: .amdhsa_enable_private_segment 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_wavefront_size32 1
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+ .amdhsa_wavefront_size32 1
+.end_amdhsa_kernel
+
+;--- 2.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1100 < 2.s > 2.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 2.o | tail -n +7 | tee 2-disasm.s | FileCheck 2.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1100 < 2-disasm.s > 2-disasm.o
+; RUN: cmp 2.o 2-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_shared_vgpr_count 0
+; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: ; TRAP_ON_START 0
+; CHECK-NEXT: ; TRAP_ON_END 0
+; CHECK-NEXT: ; IMAGE_OP 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1
+; CHECK-NEXT: .amdhsa_memory_ordered 1
+; CHECK-NEXT: .amdhsa_forward_progress 0
+; CHECK-NEXT: .amdhsa_enable_private_segment 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_wavefront_size32 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+ .amdhsa_shared_vgpr_count 0
+.end_amdhsa_kernel
+
+;--- 3.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1100 < 3.s > 3.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 3.o | tail -n +7 | tee 3-disasm.s | FileCheck 3.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1100 < 3-disasm.s > 3-disasm.o
+; RUN: cmp 3.o 3-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
+; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: ; TRAP_ON_START 0
+; CHECK-NEXT: ; TRAP_ON_END 0
+; CHECK-NEXT: ; IMAGE_OP 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1
+; CHECK-NEXT: .amdhsa_memory_ordered 1
+; CHECK-NEXT: .amdhsa_forward_progress 0
+; CHECK-NEXT: .amdhsa_enable_private_segment 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_wavefront_size32 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+ .amdhsa_shared_vgpr_count 1
+.end_amdhsa_kernel
+
+;--- 4.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1100 < 4.s > 4.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 4.o | tail -n +7 | tee 4-disasm.s | FileCheck 4.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1100 < 4-disasm.s > 4-disasm.o
+; RUN: cmp 4.o 4-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
+; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: ; TRAP_ON_START 0
+; CHECK-NEXT: ; TRAP_ON_END 0
+; CHECK-NEXT: ; IMAGE_OP 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1
+; CHECK-NEXT: .amdhsa_memory_ordered 1
+; CHECK-NEXT: .amdhsa_forward_progress 0
+; CHECK-NEXT: .amdhsa_enable_private_segment 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_wavefront_size32 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+ .amdhsa_shared_vgpr_count 1
+ .amdhsa_wavefront_size32 0
+.end_amdhsa_kernel
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s
index 04cf28f89e44826..39cef4da4278df2 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s
@@ -26,10 +26,6 @@
; OBJDUMP-NEXT: .amdhsa_private_segment_fixed_size 0
; OBJDUMP-NEXT: .amdhsa_kernarg_size 0
; OBJDUMP-NEXT: .amdhsa_shared_vgpr_count 0
-; OBJDUMP-NEXT: ; INST_PREF_SIZE 0
-; OBJDUMP-NEXT: ; TRAP_ON_START 0
-; OBJDUMP-NEXT: ; TRAP_ON_END 0
-; OBJDUMP-NEXT: ; IMAGE_OP 0
; OBJDUMP-NEXT: .amdhsa_next_free_vgpr 8
; OBJDUMP-NEXT: .amdhsa_reserve_vcc 0
; OBJDUMP-NEXT: .amdhsa_reserve_flat_scratch 0
More information about the llvm-commits
mailing list