[llvm] [AMDGPU] gfx1250 kernel descriptor update (PR #155008)
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 22 11:25:22 PDT 2025
https://github.com/rampitec created https://github.com/llvm/llvm-project/pull/155008
None
>From 149ec1208a81b6857a98d41cf8bc89d24f61a782 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Fri, 22 Aug 2025 11:23:08 -0700
Subject: [PATCH] [AMDGPU] gfx1250 kernel descriptor update
---
llvm/docs/AMDGPUUsage.rst | 81 ++++-
.../llvm/Support/AMDHSAKernelDescriptor.h | 39 ++-
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 24 +-
.../Disassembler/AMDGPUDisassembler.cpp | 45 ++-
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 14 +-
llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s | 323 ++++++++++++++++++
.../AMDGPU/kernel-descriptor-rsrc-errors.test | 7 +-
.../llvm-objdump/ELF/AMDGPU/kd-gfx1250.s | 121 +++++++
8 files changed, 610 insertions(+), 44 deletions(-)
create mode 100644 llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s
create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx1250.s
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index f7a847ec7f38f..b6d61a62f50ff 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -5405,7 +5405,21 @@ The fields used by CP for code objects before V3 also match those specified in
Used by CP to set up
``COMPUTE_PGM_RSRC1.FP16_OVFL``.
- 28:27 2 bits Reserved, must be 0.
+ 27 1 bit RESERVED GFX6-GFX120*
+ Reserved, must be 0.
+ FLAT_SCRATCH_IS_NV GFX125*
+ 0 - Use the NV ISA as indication
+ that scratch is NV. 1 - Force
+ scratch to NV = 1, even if
+ ISA.NV == 0 if the address falls
+ into scratch space (not global).
+ This allows global.NV = 0 and
+ scratch.NV = 1 for flat ops. Other
+ threads use the ISA bit value.
+
+ Used by CP to set up
+ ``COMPUTE_PGM_RSRC1.FLAT_SCRATCH_IS_NV``.
+ 28 1 bit RESERVED Reserved, must be 0.
29 1 bit WGP_MODE GFX6-GFX9
Reserved, must be 0.
GFX10-GFX12
@@ -5487,15 +5501,16 @@ The fields used by CP for code objects before V3 also match those specified in
Used by CP to set up
``COMPUTE_PGM_RSRC2.SCRATCH_EN``.
- 5:1 5 bits USER_SGPR_COUNT The total number of SGPR
- user data
- registers requested. This
- number must be greater than
- or equal to the number of user
- data registers enabled.
+ 5:1 5 bits USER_SGPR_COUNT GFX6-GFX120*
+ The total number of SGPR
+ user data
+ registers requested. This
+ number must be greater than
+ or equal to the number of user
+ data registers enabled.
- Used by CP to set up
- ``COMPUTE_PGM_RSRC2.USER_SGPR``.
+ Used by CP to set up
+ ``COMPUTE_PGM_RSRC2.USER_SGPR``.
6 1 bit ENABLE_TRAP_HANDLER GFX6-GFX11
Must be 0.
@@ -5504,8 +5519,25 @@ The fields used by CP for code objects before V3 also match those specified in
which is set by the CP if
the runtime has installed a
trap handler.
- GFX12
- Reserved, must be 0.
+ ENABLE_DYNAMIC_VGPR GFX120*
+ Enables dynamic VGPR mode, where
+ each wave allocates one VGPR chunk
+ at launch and can request for
+ additional space to use during
+ execution in SQ.
+
+ Used by CP to set up
+ ``COMPUTE_PGM_RSRC2.DYNAMIC_VGPR``.
+ 6:1 6 bits USER_SGPR_COUNT GFX125*
+ The total number of SGPR
+ user data
+ registers requested. This
+ number must be greater than
+ or equal to the number of user
+ data registers enabled.
+
+ Used by CP to set up
+ ``COMPUTE_PGM_RSRC2.USER_SGPR``.
7 1 bit ENABLE_SGPR_WORKGROUP_ID_X Enable the setup of the
system SGPR register for
the work-group id in the X
@@ -5598,7 +5630,7 @@ The fields used by CP for code objects before V3 also match those specified in
GFX6
roundup(lds-size / (64 * 4))
- GFX7-GFX11
+ GFX7-GFX12
roundup(lds-size / (128 * 4))
GFX950
roundup(lds-size / (320 * 4))
@@ -5722,7 +5754,30 @@ The fields used by CP for code objects before V3 also match those specified in
with a granularity of 128 bytes.
12 1 bit RESERVED Reserved, must be 0.
13 1 bit GLG_EN If 1, group launch guarantee will be enabled for this dispatch
- 30:14 17 bits RESERVED Reserved, must be 0.
+ 16:14 3 bits RESERVED GFX120*
+ Reserved, must be 0.
+ NAMED_BAR_CNT GFX125*
+ Number of named barriers to alloc for each workgroup, in granularity of
+ 4. Range is from 0-4 allocating 0, 4, 8, 12, 16.
+ 17 1 bit RESERVED GFX120*
+ Reserved, must be 0.
+ ENABLE_DYNAMIC_VGPR GFX125*
+ Enables dynamic VGPR mode, where each wave allocates one VGPR chunk
+ at launch and can request for additional space to use during
+ execution in SQ.
+
+ Used by CP to set up ``COMPUTE_PGM_RSRC3.DYNAMIC_VGPR``.
+ 20:18 3 bits RESERVED GFX120*
+ Reserved, must be 0.
+ TCP_SPLIT GFX125*
+ Desired LDS/VC split of TCP. 0: no preference 1: LDS=0, VC=448kB
+ 2: LDS=64kB, VC=384kB 3: LDS=128kB, VC=320kB 4: LDS=192kB, VC=256kB
+ 5: LDS=256kB, VC=192kB 6: LDS=320kB, VC=128kB 7: LDS=384kB, VC=64kB
+ 21 1 bit RESERVED GFX120*
+ Reserved, must be 0.
+ ENABLE_DIDT_THROTTLE GFX125*
+ Enable DIDT throttling for all ACE pipes
+ 30:22 9 bits RESERVED Reserved, must be 0.
31 1 bit IMAGE_OP If 1, the kernel execution contains image instructions. If executed as
part of a graphics pipeline, image read instructions will stall waiting
for any necessary ``WAIT_SYNC`` fence to be performed in order to
diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
index 78f38ed5a9d4b..60bc4dd5d1a13 100644
--- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -94,6 +94,9 @@ enum : uint8_t {
// [GFX6-GFX11].
#define COMPUTE_PGM_RSRC1_GFX6_GFX11(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX11_##NAME, SHIFT, WIDTH)
+// [GFX6-GFX120].
+#define COMPUTE_PGM_RSRC1_GFX6_GFX120(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX120_ ## NAME, SHIFT, WIDTH)
// GFX9+.
#define COMPUTE_PGM_RSRC1_GFX9_PLUS(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX9_PLUS_ ## NAME, SHIFT, WIDTH)
@@ -103,6 +106,9 @@ enum : uint8_t {
// GFX12+.
#define COMPUTE_PGM_RSRC1_GFX12_PLUS(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX12_PLUS_##NAME, SHIFT, WIDTH)
+// [GFX125].
+#define COMPUTE_PGM_RSRC1_GFX125(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX125_##NAME, SHIFT, WIDTH)
enum : int32_t {
COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
@@ -121,8 +127,10 @@ enum : int32_t {
COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
COMPUTE_PGM_RSRC1_GFX6_GFX8(RESERVED0, 26, 1),
COMPUTE_PGM_RSRC1_GFX9_PLUS(FP16_OVFL, 26, 1),
- COMPUTE_PGM_RSRC1(RESERVED1, 27, 2),
- COMPUTE_PGM_RSRC1_GFX6_GFX9(RESERVED2, 29, 3),
+ COMPUTE_PGM_RSRC1_GFX6_GFX120(RESERVED1, 27, 1),
+ COMPUTE_PGM_RSRC1_GFX125(FLAT_SCRATCH_IS_NV, 27, 1),
+ COMPUTE_PGM_RSRC1(RESERVED2, 28, 1),
+ COMPUTE_PGM_RSRC1_GFX6_GFX9(RESERVED3, 29, 3),
COMPUTE_PGM_RSRC1_GFX10_PLUS(WGP_MODE, 29, 1),
COMPUTE_PGM_RSRC1_GFX10_PLUS(MEM_ORDERED, 30, 1),
COMPUTE_PGM_RSRC1_GFX10_PLUS(FWD_PROGRESS, 31, 1),
@@ -136,14 +144,24 @@ enum : int32_t {
// [GFX6-GFX11].
#define COMPUTE_PGM_RSRC2_GFX6_GFX11(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX6_GFX11_##NAME, SHIFT, WIDTH)
+// [GFX6-GFX120].
+#define COMPUTE_PGM_RSRC2_GFX6_GFX120(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX6_GFX120_ ## NAME, SHIFT, WIDTH)
// GFX12+.
#define COMPUTE_PGM_RSRC2_GFX12_PLUS(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX12_PLUS_##NAME, SHIFT, WIDTH)
+// [GFX120].
+#define COMPUTE_PGM_RSRC2_GFX120(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX120_ ## NAME, SHIFT, WIDTH)
+// [GFX125].
+#define COMPUTE_PGM_RSRC2_GFX125(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX125_##NAME, SHIFT, WIDTH)
enum : int32_t {
COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1),
- COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5),
+ COMPUTE_PGM_RSRC2_GFX6_GFX120(USER_SGPR_COUNT, 1, 5),
COMPUTE_PGM_RSRC2_GFX6_GFX11(ENABLE_TRAP_HANDLER, 6, 1),
- COMPUTE_PGM_RSRC2_GFX12_PLUS(RESERVED1, 6, 1),
+ COMPUTE_PGM_RSRC2_GFX120(ENABLE_DYNAMIC_VGPR, 6, 1),
+ COMPUTE_PGM_RSRC2_GFX125(USER_SGPR_COUNT, 1, 6),
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
@@ -178,8 +196,8 @@ enum : int32_t {
// Compute program resource register 3 for GFX10+. Must match hardware
// definition.
// GFX10+.
-#define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
- AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
+#define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_##NAME, SHIFT, WIDTH)
// [GFX10].
#define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_##NAME, SHIFT, WIDTH)
@@ -212,10 +230,13 @@ enum : int32_t {
COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED2, 12, 1),
COMPUTE_PGM_RSRC3_GFX10_GFX11(RESERVED3, 13, 1),
COMPUTE_PGM_RSRC3_GFX12_PLUS(GLG_EN, 13, 1),
- COMPUTE_PGM_RSRC3_GFX10_GFX120(RESERVED4, 14, 3),
+ COMPUTE_PGM_RSRC3_GFX10_GFX120(RESERVED4, 14, 8),
COMPUTE_PGM_RSRC3_GFX125(NAMED_BAR_CNT, 14, 3),
- COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED5, 17, 14),
- COMPUTE_PGM_RSRC3_GFX10(RESERVED5, 31, 1),
+ COMPUTE_PGM_RSRC3_GFX125(ENABLE_DYNAMIC_VGPR, 17, 1),
+ COMPUTE_PGM_RSRC3_GFX125(TCP_SPLIT, 18, 3),
+ COMPUTE_PGM_RSRC3_GFX125(ENABLE_DIDT_THROTTLE, 21, 1),
+ COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED5, 22, 9),
+ COMPUTE_PGM_RSRC3_GFX10(RESERVED6, 31, 1),
COMPUTE_PGM_RSRC3_GFX11_PLUS(IMAGE_OP, 31, 1),
};
#undef COMPUTE_PGM_RSRC3_GFX10_PLUS
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 78a2678808eee..2e21ba4c30b53 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -6410,12 +6410,24 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return TokError("amdgpu_user_sgpr_count smaller than than implied by "
"enabled user SGPRs");
- if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
- return TokError("too many user SGPRs enabled");
- AMDGPU::MCKernelDescriptor::bits_set(
- KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
- COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
- COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
+ if (isGFX1250()) {
+ if (!isUInt<COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
+ return TokError("too many user SGPRs enabled");
+ AMDGPU::MCKernelDescriptor::bits_set(
+ KD.compute_pgm_rsrc2,
+ MCConstantExpr::create(UserSGPRCount, getContext()),
+ COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
+ COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
+ } else {
+ if (!isUInt<COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_WIDTH>(
+ UserSGPRCount))
+ return TokError("too many user SGPRs enabled");
+ AMDGPU::MCKernelDescriptor::bits_set(
+ KD.compute_pgm_rsrc2,
+ MCConstantExpr::create(UserSGPRCount, getContext()),
+ COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
+ COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
+ }
int64_t IVal = 0;
if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 4b891e48ff273..6a2beeed41dfd 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -2284,24 +2284,38 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
- if (isGFX9Plus())
+ // Bits [26].
+ if (isGFX9Plus()) {
PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
-
- if (!isGFX9Plus())
+ } else {
CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
"COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
+ }
- CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED1, "COMPUTE_PGM_RSRC1");
+ // Bits [27].
+ if (isGFX1250()) {
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
+ COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
+ } else {
+ CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
+ "COMPUTE_PGM_RSRC1");
+ }
- if (!isGFX10Plus())
- CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2,
- "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10");
+ // Bits [28].
+ CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
+ // Bits [29-31].
if (isGFX10Plus()) {
- PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
- COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
+ // WGP_MODE is not available on GFX1250.
+ if (!isGFX1250()) {
+ PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
+ COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
+ }
PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
+ } else {
+ CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
+ "COMPUTE_PGM_RSRC1");
}
if (isGFX12Plus())
@@ -2423,17 +2437,24 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
"must be zero on gfx10 or gfx11");
}
- // Bits [14-16]
+ // Bits [14-21].
if (isGFX1250()) {
PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
+ PRINT_PSEUDO_DIRECTIVE_COMMENT(
+ "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("TCP_SPLIT",
+ COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
+ PRINT_PSEUDO_DIRECTIVE_COMMENT(
+ "ENABLE_DIDT_THROTTLE",
+ COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
} else {
CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
"COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
}
- // Bits [17-30].
+ // Bits [22-30].
CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
"COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
@@ -2442,7 +2463,7 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
} else {
- CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED5,
+ CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
"COMPUTE_PGM_RSRC3",
"must be zero on gfx10");
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index b58ba947c72e2..0bbab29dbda18 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -396,9 +396,17 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
EmitMCExpr(KD.kernarg_size);
OS << '\n';
- PrintField(
- KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
- amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, ".amdhsa_user_sgpr_count");
+ if (isGFX1250(STI)) {
+ PrintField(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
+ ".amdhsa_user_sgpr_count");
+ } else {
+ PrintField(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,
+ ".amdhsa_user_sgpr_count");
+ }
if (!hasArchitectedFlatScratch(STI))
PrintField(
diff --git a/llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s b/llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s
new file mode 100644
index 0000000000000..3cd1d877600e2
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s
@@ -0,0 +1,323 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1250 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1250 --amdhsa-code-object-version=4 -filetype=obj < %s > %t
+// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// READOBJ: Section Headers
+// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
+// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} 000540 {{[0-9a-f]+}} {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
+
+// READOBJ: Relocation section '.rela.rodata' at offset
+// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
+// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
+// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
+// READOBJ: 00000000000000d0 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 310
+// READOBJ: 0000000000000110 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 410
+
+// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
+// READOBJ: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal
+// READOBJ-NEXT: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete
+// READOBJ-NEXT: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr
+// READOBJ-NEXT: 0000000000000300 0 FUNC LOCAL PROTECTED 2 disabled_user_sgpr
+// READOBJ-NEXT: 0000000000000400 0 FUNC LOCAL PROTECTED 2 max_lds_size
+// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd
+// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd
+// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd
+// READOBJ-NEXT: 00000000000000c0 64 OBJECT LOCAL DEFAULT 3 disabled_user_sgpr.kd
+// READOBJ-NEXT: 0000000000000100 64 OBJECT LOCAL DEFAULT 3 max_lds_size.kd
+
+// OBJDUMP: Contents of section .rodata
+// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
+// minimal
+// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0030 00000cc0 80000000 00040000 00000000
+// complete
+// OBJDUMP-NEXT: 0040 01000000 01000000 0c000000 00000000
+// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00c00000
+// OBJDUMP-NEXT: 0070 015021c4 410f007f 5e068200 00000000
+// special_sgpr
+// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00b0 000000c0 80000000 00040000 00000000
+// disabled_user_sgpr
+// OBJDUMP-NEXT: 00c0 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00f0 00000cc0 80000000 00040000 00000000
+// max_lds_size
+// OBJDUMP-NEXT: 0100 00000600 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0110 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0120 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0130 00000cc0 80000000 00040000 00000000
+
+.text
+
+.amdgcn_target "amdgcn-amd-amdhsa--gfx1250"
+// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1250"
+
+.p2align 8
+.type minimal, at function
+minimal:
+ s_endpgm
+
+.p2align 8
+.type complete, at function
+complete:
+ s_endpgm
+
+.p2align 8
+.type special_sgpr, at function
+special_sgpr:
+ s_endpgm
+
+.p2align 8
+.type disabled_user_sgpr, at function
+disabled_user_sgpr:
+ s_endpgm
+
+.p2align 8
+.type max_lds_size, at function
+max_lds_size:
+ s_endpgm
+
+.rodata
+// ASM: .rodata
+
+// Test that only specifying required directives is allowed, and that defaulted
+// values are omitted.
+.p2align 6
+.amdhsa_kernel minimal
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel minimal
+// ASM: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM: .end_amdhsa_kernel
+
+// Test that we can specify all available directives with non-default values.
+.p2align 6
+.amdhsa_kernel complete
+ .amdhsa_group_segment_fixed_size 1
+ .amdhsa_private_segment_fixed_size 1
+ .amdhsa_kernarg_size 12
+ .amdhsa_user_sgpr_count 32
+ .amdhsa_user_sgpr_dispatch_ptr 1
+ .amdhsa_user_sgpr_queue_ptr 1
+ .amdhsa_user_sgpr_kernarg_segment_ptr 1
+ .amdhsa_user_sgpr_dispatch_id 1
+ .amdhsa_user_sgpr_kernarg_preload_length 2
+ .amdhsa_user_sgpr_kernarg_preload_offset 1
+ .amdhsa_user_sgpr_private_segment_size 1
+ .amdhsa_uses_cu_stores 1
+ .amdhsa_wavefront_size32 1
+ .amdhsa_enable_private_segment 1
+ .amdhsa_system_sgpr_workgroup_id_x 0
+ .amdhsa_system_sgpr_workgroup_id_y 1
+ .amdhsa_system_sgpr_workgroup_id_z 1
+ .amdhsa_system_sgpr_workgroup_info 1
+ .amdhsa_system_vgpr_workitem_id 1
+ .amdhsa_next_free_vgpr 9
+ .amdhsa_next_free_sgpr 27
+ .amdhsa_named_barrier_count 3
+ .amdhsa_reserve_vcc 0
+ .amdhsa_float_round_mode_32 1
+ .amdhsa_float_round_mode_16_64 1
+ .amdhsa_float_denorm_mode_32 1
+ .amdhsa_float_denorm_mode_16_64 0
+ .amdhsa_fp16_overflow 1
+ .amdhsa_memory_ordered 1
+ .amdhsa_forward_progress 1
+ .amdhsa_round_robin_scheduling 1
+ .amdhsa_exception_fp_ieee_invalid_op 1
+ .amdhsa_exception_fp_denorm_src 1
+ .amdhsa_exception_fp_ieee_div_zero 1
+ .amdhsa_exception_fp_ieee_overflow 1
+ .amdhsa_exception_fp_ieee_underflow 1
+ .amdhsa_exception_fp_ieee_inexact 1
+ .amdhsa_exception_int_div_zero 1
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel complete
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
+// ASM-NEXT: .amdhsa_kernarg_size 12
+// ASM-NEXT: .amdhsa_user_sgpr_count 32
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 2
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 1
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
+// ASM-NEXT: .amdhsa_uses_cu_stores 1
+// ASM-NEXT: .amdhsa_wavefront_size32 1
+// ASM-NEXT: .amdhsa_enable_private_segment 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
+// ASM-NEXT: .amdhsa_next_free_vgpr 9
+// ASM-NEXT: .amdhsa_next_free_sgpr 32
+// ASM-NEXT: .amdhsa_named_barrier_count 3
+// ASM-NEXT: .amdhsa_reserve_vcc 0
+// ASM-NEXT: .amdhsa_float_round_mode_32 1
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
+// ASM-NEXT: .amdhsa_fp16_overflow 1
+// ASM-NEXT: .amdhsa_memory_ordered 1
+// ASM-NEXT: .amdhsa_forward_progress 1
+// ASM-NEXT: .amdhsa_inst_pref_size 0
+// ASM-NEXT: .amdhsa_round_robin_scheduling 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
+// ASM-NEXT: .amdhsa_exception_int_div_zero 1
+// ASM-NEXT: .end_amdhsa_kernel
+
+// Test that we are including special SGPR usage in the granulated count.
+.p2align 6
+.amdhsa_kernel special_sgpr
+ .amdhsa_next_free_sgpr 27
+
+ .amdhsa_reserve_vcc 0
+
+ .amdhsa_float_denorm_mode_16_64 0
+ .amdhsa_next_free_vgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel special_sgpr
+// ASM: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 27
+// ASM-NEXT: .amdhsa_named_barrier_count 0
+// ASM-NEXT: .amdhsa_reserve_vcc 0
+// ASM: .amdhsa_float_denorm_mode_16_64 0
+// ASM: .end_amdhsa_kernel
+
+// Test that explicitly disabling user_sgpr's does not affect the user_sgpr
+// count, i.e. this should produce the same descriptor as minimal.
+.p2align 6
+.amdhsa_kernel disabled_user_sgpr
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel disabled_user_sgpr
+// ASM: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM: .end_amdhsa_kernel
+
+.p2align 6
+.amdhsa_kernel max_lds_size
+ .amdhsa_group_segment_fixed_size 393216
+ .amdhsa_next_free_vgpr 1
+ .amdhsa_next_free_sgpr 1
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel max_lds_size
+// ASM: .amdhsa_group_segment_fixed_size 393216
+// ASM: .end_amdhsa_kernel
+
+.section .foo
+
+.byte .amdgcn.gfx_generation_number
+// ASM: .byte 12
+
+.byte .amdgcn.gfx_generation_minor
+// ASM: .byte 5
+
+.byte .amdgcn.gfx_generation_stepping
+// ASM: .byte 0
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 0
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 0
+
+v_mov_b32_e32 v16, s3
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 17
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 4
+
+.set .amdgcn.next_free_vgpr, 0
+.set .amdgcn.next_free_sgpr, 0
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 0
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 0
+
+v_mov_b32_e32 v16, s3
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 17
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 4
+
+// Metadata
+
+.amdgpu_metadata
+ amdhsa.version:
+ - 3
+ - 0
+ amdhsa.kernels:
+ - .name: amd_kernel_code_t_test_all
+ .symbol: amd_kernel_code_t_test_all at kd
+ .kernarg_segment_size: 8
+ .group_segment_fixed_size: 16
+ .private_segment_fixed_size: 32
+ .kernarg_segment_align: 64
+ .wavefront_size: 128
+ .sgpr_count: 14
+ .vgpr_count: 1024
+ .max_flat_workgroup_size: 256
+ - .name: amd_kernel_code_t_minimal
+ .symbol: amd_kernel_code_t_minimal at kd
+ .kernarg_segment_size: 8
+ .group_segment_fixed_size: 16
+ .private_segment_fixed_size: 32
+ .kernarg_segment_align: 64
+ .wavefront_size: 128
+ .sgpr_count: 14
+ .vgpr_count: 40
+ .max_flat_workgroup_size: 256
+.end_amdgpu_metadata
+
+// ASM: .amdgpu_metadata
+// ASM: amdhsa.kernels:
+// ASM: - .group_segment_fixed_size: 16
+// ASM: .kernarg_segment_align: 64
+// ASM: .kernarg_segment_size: 8
+// ASM: .max_flat_workgroup_size: 256
+// ASM: .name: amd_kernel_code_t_test_all
+// ASM: .private_segment_fixed_size: 32
+// ASM: .sgpr_count: 14
+// ASM: .symbol: 'amd_kernel_code_t_test_all at kd'
+// ASM: .vgpr_count: 1024
+// ASM: .wavefront_size: 128
+// ASM: - .group_segment_fixed_size: 16
+// ASM: .kernarg_segment_align: 64
+// ASM: .kernarg_segment_size: 8
+// ASM: .max_flat_workgroup_size: 256
+// ASM: .name: amd_kernel_code_t_minimal
+// ASM: .private_segment_fixed_size: 32
+// ASM: .sgpr_count: 14
+// ASM: .symbol: 'amd_kernel_code_t_minimal at kd'
+// ASM: .vgpr_count: 40
+// ASM: .wavefront_size: 128
+// ASM: amdhsa.version:
+// ASM-NEXT: - 3
+// ASM-NEXT: - 0
+// ASM: .end_amdgpu_metadata
diff --git a/llvm/test/MC/Disassembler/AMDGPU/kernel-descriptor-rsrc-errors.test b/llvm/test/MC/Disassembler/AMDGPU/kernel-descriptor-rsrc-errors.test
index ad7bba076002f..be92aade6b531 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/kernel-descriptor-rsrc-errors.test
+++ b/llvm/test/MC/Disassembler/AMDGPU/kernel-descriptor-rsrc-errors.test
@@ -39,9 +39,14 @@
# RUN: yaml2obj %s -DGPU=GFX1100 -DSRC1=0300AC60 -DSRC2=80000000 -DSRC3=00000100 \
# RUN: | llvm-objdump --disassemble-symbols=test.kd - | FileCheck %s --check-prefix=RSRC3_10
-# RSRC3_10: ; error decoding test.kd: kernel descriptor COMPUTE_PGM_RSRC3 reserved bits in range (16:14) set, must be zero on gfx10+
+# RSRC3_10: ; error decoding test.kd: kernel descriptor COMPUTE_PGM_RSRC3 reserved bits in range (21:14) set, must be zero on gfx10+
# RSRC3_10-NEXT: ; decoding failed region as bytes
+# RUN: yaml2obj %s -DGPU=GFX1100 -DSRC1=0300AC60 -DSRC2=80000000 -DSRC3=00004000 \
+# RUN: | llvm-objdump --disassemble-symbols=test.kd - | FileCheck %s --check-prefix=RSRC3_10_1
+# RSRC3_10_1: ; error decoding test.kd: kernel descriptor COMPUTE_PGM_RSRC3 reserved bits in range (30:22) set, must be zero on gfx10+
+# RSRC3_10_1-NEXT: ; decoding failed region as bytes
+
# RUN: yaml2obj %s -DGPU=GFX801 -DSRC1=0300AC60 -DSRC2=80000000 -DSRC3=00000001 \
# RUN: | llvm-objdump --disassemble-symbols=test.kd - | FileCheck %s --check-prefix=RSRC3_PRE_9
# RSRC3_PRE_9: ; error decoding test.kd: kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx1250.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx1250.s
new file mode 100644
index 0000000000000..99a4df3e5adfb
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx1250.s
@@ -0,0 +1,121 @@
+;; Test disassembly for gfx1250 kernel descriptor.
+
+; RUN: rm -rf %t && split-file %s %t && cd %t
+
+;--- 1.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -filetype=obj -mcpu=gfx1250 < 1.s > 1.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 1.o | tail -n +7 | tee 1-disasm.s | FileCheck 1.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -filetype=obj -mcpu=gfx1250 < 1-disasm.s > 1-disasm.o
+; RUN: cmp 1.o 1-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_inst_pref_size 0
+; CHECK-NEXT: ; GLG_EN 0
+; CHECK-NEXT: .amdhsa_named_barrier_count 0
+; CHECK-NEXT: ; ENABLE_DYNAMIC_VGPR 0
+; CHECK-NEXT: ; TCP_SPLIT 0
+; CHECK-NEXT: ; ENABLE_DIDT_THROTTLE 0
+; CHECK-NEXT: ; IMAGE_OP 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: ; FLAT_SCRATCH_IS_NV 0
+; CHECK-NEXT: .amdhsa_memory_ordered 1
+; CHECK-NEXT: .amdhsa_forward_progress 1
+; CHECK-NEXT: .amdhsa_round_robin_scheduling 0
+; CHECK-NEXT: .amdhsa_enable_private_segment 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_cu_stores 1
+; CHECK-NEXT: .amdhsa_wavefront_size32 1
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+ .amdhsa_inst_pref_size 0
+ .amdhsa_uses_cu_stores 1
+.end_amdhsa_kernel
+
+;--- 2.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -filetype=obj -mcpu=gfx1250 < 2.s > 2.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 2.o | tail -n +7 | tee 2-disasm.s | FileCheck 2.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -filetype=obj -mcpu=gfx1250 < 2-disasm.s > 2-disasm.o
+; RUN: cmp 2.o 2-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 393216
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_inst_pref_size 63
+; CHECK-NEXT: ; GLG_EN 0
+; CHECK-NEXT: .amdhsa_named_barrier_count 7
+; CHECK-NEXT: ; ENABLE_DYNAMIC_VGPR 0
+; CHECK-NEXT: ; TCP_SPLIT 0
+; CHECK-NEXT: ; ENABLE_DIDT_THROTTLE 0
+; CHECK-NEXT: ; IMAGE_OP 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: ; FLAT_SCRATCH_IS_NV 0
+; CHECK-NEXT: .amdhsa_memory_ordered 1
+; CHECK-NEXT: .amdhsa_forward_progress 1
+; CHECK-NEXT: .amdhsa_round_robin_scheduling 0
+; CHECK-NEXT: .amdhsa_enable_private_segment 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_cu_stores 0
+; CHECK-NEXT: .amdhsa_wavefront_size32 1
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+ .amdhsa_group_segment_fixed_size 393216
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+ .amdhsa_named_barrier_count 7
+ .amdhsa_uses_cu_stores 0
+ .amdhsa_inst_pref_size 63
+.end_amdhsa_kernel
More information about the llvm-commits
mailing list