[llvm] [AMDGPU] gfx1250 kernel descriptor update (PR #155008)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 22 11:26:37 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-binary-utilities
@llvm/pr-subscribers-llvm-support
Author: Stanislav Mekhanoshin (rampitec)
<details>
<summary>Changes</summary>
---
Patch is 38.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155008.diff
8 Files Affected:
- (modified) llvm/docs/AMDGPUUsage.rst (+68-13)
- (modified) llvm/include/llvm/Support/AMDHSAKernelDescriptor.h (+30-9)
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+18-6)
- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+33-12)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (+11-3)
- (added) llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s (+323)
- (modified) llvm/test/MC/Disassembler/AMDGPU/kernel-descriptor-rsrc-errors.test (+6-1)
- (added) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx1250.s (+121)
``````````diff
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index f7a847ec7f38f..b6d61a62f50ff 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -5405,7 +5405,21 @@ The fields used by CP for code objects before V3 also match those specified in
Used by CP to set up
``COMPUTE_PGM_RSRC1.FP16_OVFL``.
- 28:27 2 bits Reserved, must be 0.
+ 27 1 bit RESERVED GFX6-GFX120*
+ Reserved, must be 0.
+ FLAT_SCRATCH_IS_NV GFX125*
+ 0 - Use the NV ISA as indication
+ that scratch is NV. 1 - Force
+ scratch to NV = 1, even if
+ ISA.NV == 0 if the address falls
+ into scratch space (not global).
+ This allows global.NV = 0 and
+ scratch.NV = 1 for flat ops. Other
+ threads use the ISA bit value.
+
+ Used by CP to set up
+ ``COMPUTE_PGM_RSRC1.FLAT_SCRATCH_IS_NV``.
+ 28 1 bit RESERVED Reserved, must be 0.
29 1 bit WGP_MODE GFX6-GFX9
Reserved, must be 0.
GFX10-GFX12
@@ -5487,15 +5501,16 @@ The fields used by CP for code objects before V3 also match those specified in
Used by CP to set up
``COMPUTE_PGM_RSRC2.SCRATCH_EN``.
- 5:1 5 bits USER_SGPR_COUNT The total number of SGPR
- user data
- registers requested. This
- number must be greater than
- or equal to the number of user
- data registers enabled.
+ 5:1 5 bits USER_SGPR_COUNT GFX6-GFX120*
+ The total number of SGPR
+ user data
+ registers requested. This
+ number must be greater than
+ or equal to the number of user
+ data registers enabled.
- Used by CP to set up
- ``COMPUTE_PGM_RSRC2.USER_SGPR``.
+ Used by CP to set up
+ ``COMPUTE_PGM_RSRC2.USER_SGPR``.
6 1 bit ENABLE_TRAP_HANDLER GFX6-GFX11
Must be 0.
@@ -5504,8 +5519,25 @@ The fields used by CP for code objects before V3 also match those specified in
which is set by the CP if
the runtime has installed a
trap handler.
- GFX12
- Reserved, must be 0.
+ ENABLE_DYNAMIC_VGPR GFX120*
+ Enables dynamic VGPR mode, where
+ each wave allocates one VGPR chunk
+ at launch and can request for
+ additional space to use during
+ execution in SQ.
+
+ Used by CP to set up
+ ``COMPUTE_PGM_RSRC2.DYNAMIC_VGPR``.
+ 6:1 6 bits USER_SGPR_COUNT GFX125*
+ The total number of SGPR
+ user data
+ registers requested. This
+ number must be greater than
+ or equal to the number of user
+ data registers enabled.
+
+ Used by CP to set up
+ ``COMPUTE_PGM_RSRC2.USER_SGPR``.
7 1 bit ENABLE_SGPR_WORKGROUP_ID_X Enable the setup of the
system SGPR register for
the work-group id in the X
@@ -5598,7 +5630,7 @@ The fields used by CP for code objects before V3 also match those specified in
GFX6
roundup(lds-size / (64 * 4))
- GFX7-GFX11
+ GFX7-GFX12
roundup(lds-size / (128 * 4))
GFX950
roundup(lds-size / (320 * 4))
@@ -5722,7 +5754,30 @@ The fields used by CP for code objects before V3 also match those specified in
with a granularity of 128 bytes.
12 1 bit RESERVED Reserved, must be 0.
13 1 bit GLG_EN If 1, group launch guarantee will be enabled for this dispatch
- 30:14 17 bits RESERVED Reserved, must be 0.
+ 16:14 3 bits RESERVED GFX120*
+ Reserved, must be 0.
+ NAMED_BAR_CNT GFX125*
+ Number of named barriers to alloc for each workgroup, in granularity of
+ 4. Range is from 0-4 allocating 0, 4, 8, 12, 16.
+ 17 1 bit RESERVED GFX120*
+ Reserved, must be 0.
+ ENABLE_DYNAMIC_VGPR GFX125*
+ Enables dynamic VGPR mode, where each wave allocates one VGPR chunk
+ at launch and can request for additional space to use during
+ execution in SQ.
+
+ Used by CP to set up ``COMPUTE_PGM_RSRC3.DYNAMIC_VGPR``.
+ 20:18 3 bits RESERVED GFX120*
+ Reserved, must be 0.
+ TCP_SPLIT GFX125*
+ Desired LDS/VC split of TCP. 0: no preference 1: LDS=0, VC=448kB
+ 2: LDS=64kB, VC=384kB 3: LDS=128kB, VC=320kB 4: LDS=192kB, VC=256kB
+ 5: LDS=256kB, VC=192kB 6: LDS=320kB, VC=128kB 7: LDS=384kB, VC=64kB
+ 21 1 bit RESERVED GFX120*
+ Reserved, must be 0.
+ ENABLE_DIDT_THROTTLE GFX125*
+ Enable DIDT throttling for all ACE pipes
+ 30:22 9 bits RESERVED Reserved, must be 0.
31 1 bit IMAGE_OP If 1, the kernel execution contains image instructions. If executed as
part of a graphics pipeline, image read instructions will stall waiting
for any necessary ``WAIT_SYNC`` fence to be performed in order to
diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
index 78f38ed5a9d4b..60bc4dd5d1a13 100644
--- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -94,6 +94,9 @@ enum : uint8_t {
// [GFX6-GFX11].
#define COMPUTE_PGM_RSRC1_GFX6_GFX11(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX11_##NAME, SHIFT, WIDTH)
+// [GFX6-GFX120].
+#define COMPUTE_PGM_RSRC1_GFX6_GFX120(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX120_ ## NAME, SHIFT, WIDTH)
// GFX9+.
#define COMPUTE_PGM_RSRC1_GFX9_PLUS(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX9_PLUS_ ## NAME, SHIFT, WIDTH)
@@ -103,6 +106,9 @@ enum : uint8_t {
// GFX12+.
#define COMPUTE_PGM_RSRC1_GFX12_PLUS(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX12_PLUS_##NAME, SHIFT, WIDTH)
+// [GFX125].
+#define COMPUTE_PGM_RSRC1_GFX125(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX125_##NAME, SHIFT, WIDTH)
enum : int32_t {
COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
@@ -121,8 +127,10 @@ enum : int32_t {
COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
COMPUTE_PGM_RSRC1_GFX6_GFX8(RESERVED0, 26, 1),
COMPUTE_PGM_RSRC1_GFX9_PLUS(FP16_OVFL, 26, 1),
- COMPUTE_PGM_RSRC1(RESERVED1, 27, 2),
- COMPUTE_PGM_RSRC1_GFX6_GFX9(RESERVED2, 29, 3),
+ COMPUTE_PGM_RSRC1_GFX6_GFX120(RESERVED1, 27, 1),
+ COMPUTE_PGM_RSRC1_GFX125(FLAT_SCRATCH_IS_NV, 27, 1),
+ COMPUTE_PGM_RSRC1(RESERVED2, 28, 1),
+ COMPUTE_PGM_RSRC1_GFX6_GFX9(RESERVED3, 29, 3),
COMPUTE_PGM_RSRC1_GFX10_PLUS(WGP_MODE, 29, 1),
COMPUTE_PGM_RSRC1_GFX10_PLUS(MEM_ORDERED, 30, 1),
COMPUTE_PGM_RSRC1_GFX10_PLUS(FWD_PROGRESS, 31, 1),
@@ -136,14 +144,24 @@ enum : int32_t {
// [GFX6-GFX11].
#define COMPUTE_PGM_RSRC2_GFX6_GFX11(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX6_GFX11_##NAME, SHIFT, WIDTH)
+// [GFX6-GFX120].
+#define COMPUTE_PGM_RSRC2_GFX6_GFX120(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX6_GFX120_ ## NAME, SHIFT, WIDTH)
// GFX12+.
#define COMPUTE_PGM_RSRC2_GFX12_PLUS(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX12_PLUS_##NAME, SHIFT, WIDTH)
+// [GFX120].
+#define COMPUTE_PGM_RSRC2_GFX120(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX120_ ## NAME, SHIFT, WIDTH)
+// [GFX125].
+#define COMPUTE_PGM_RSRC2_GFX125(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX125_##NAME, SHIFT, WIDTH)
enum : int32_t {
COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1),
- COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5),
+ COMPUTE_PGM_RSRC2_GFX6_GFX120(USER_SGPR_COUNT, 1, 5),
COMPUTE_PGM_RSRC2_GFX6_GFX11(ENABLE_TRAP_HANDLER, 6, 1),
- COMPUTE_PGM_RSRC2_GFX12_PLUS(RESERVED1, 6, 1),
+ COMPUTE_PGM_RSRC2_GFX120(ENABLE_DYNAMIC_VGPR, 6, 1),
+ COMPUTE_PGM_RSRC2_GFX125(USER_SGPR_COUNT, 1, 6),
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
@@ -178,8 +196,8 @@ enum : int32_t {
// Compute program resource register 3 for GFX10+. Must match hardware
// definition.
// GFX10+.
-#define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
- AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
+#define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_##NAME, SHIFT, WIDTH)
// [GFX10].
#define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_##NAME, SHIFT, WIDTH)
@@ -212,10 +230,13 @@ enum : int32_t {
COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED2, 12, 1),
COMPUTE_PGM_RSRC3_GFX10_GFX11(RESERVED3, 13, 1),
COMPUTE_PGM_RSRC3_GFX12_PLUS(GLG_EN, 13, 1),
- COMPUTE_PGM_RSRC3_GFX10_GFX120(RESERVED4, 14, 3),
+ COMPUTE_PGM_RSRC3_GFX10_GFX120(RESERVED4, 14, 8),
COMPUTE_PGM_RSRC3_GFX125(NAMED_BAR_CNT, 14, 3),
- COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED5, 17, 14),
- COMPUTE_PGM_RSRC3_GFX10(RESERVED5, 31, 1),
+ COMPUTE_PGM_RSRC3_GFX125(ENABLE_DYNAMIC_VGPR, 17, 1),
+ COMPUTE_PGM_RSRC3_GFX125(TCP_SPLIT, 18, 3),
+ COMPUTE_PGM_RSRC3_GFX125(ENABLE_DIDT_THROTTLE, 21, 1),
+ COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED5, 22, 9),
+ COMPUTE_PGM_RSRC3_GFX10(RESERVED6, 31, 1),
COMPUTE_PGM_RSRC3_GFX11_PLUS(IMAGE_OP, 31, 1),
};
#undef COMPUTE_PGM_RSRC3_GFX10_PLUS
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 78a2678808eee..2e21ba4c30b53 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -6410,12 +6410,24 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return TokError("amdgpu_user_sgpr_count smaller than than implied by "
"enabled user SGPRs");
- if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
- return TokError("too many user SGPRs enabled");
- AMDGPU::MCKernelDescriptor::bits_set(
- KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
- COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
- COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
+ if (isGFX1250()) {
+ if (!isUInt<COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
+ return TokError("too many user SGPRs enabled");
+ AMDGPU::MCKernelDescriptor::bits_set(
+ KD.compute_pgm_rsrc2,
+ MCConstantExpr::create(UserSGPRCount, getContext()),
+ COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
+ COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
+ } else {
+ if (!isUInt<COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_WIDTH>(
+ UserSGPRCount))
+ return TokError("too many user SGPRs enabled");
+ AMDGPU::MCKernelDescriptor::bits_set(
+ KD.compute_pgm_rsrc2,
+ MCConstantExpr::create(UserSGPRCount, getContext()),
+ COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
+ COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
+ }
int64_t IVal = 0;
if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 4b891e48ff273..6a2beeed41dfd 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -2284,24 +2284,38 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
- if (isGFX9Plus())
+ // Bits [26].
+ if (isGFX9Plus()) {
PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
-
- if (!isGFX9Plus())
+ } else {
CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
"COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
+ }
- CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED1, "COMPUTE_PGM_RSRC1");
+ // Bits [27].
+ if (isGFX1250()) {
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
+ COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
+ } else {
+ CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
+ "COMPUTE_PGM_RSRC1");
+ }
- if (!isGFX10Plus())
- CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2,
- "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10");
+ // Bits [28].
+ CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
+ // Bits [29-31].
if (isGFX10Plus()) {
- PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
- COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
+ // WGP_MODE is not available on GFX1250.
+ if (!isGFX1250()) {
+ PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
+ COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
+ }
PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
+ } else {
+ CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
+ "COMPUTE_PGM_RSRC1");
}
if (isGFX12Plus())
@@ -2423,17 +2437,24 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
"must be zero on gfx10 or gfx11");
}
- // Bits [14-16]
+ // Bits [14-21].
if (isGFX1250()) {
PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
+ PRINT_PSEUDO_DIRECTIVE_COMMENT(
+ "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("TCP_SPLIT",
+ COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
+ PRINT_PSEUDO_DIRECTIVE_COMMENT(
+ "ENABLE_DIDT_THROTTLE",
+ COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
} else {
CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
"COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
}
- // Bits [17-30].
+ // Bits [22-30].
CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
"COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
@@ -2442,7 +2463,7 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
} else {
- CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED5,
+ CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
"COMPUTE_PGM_RSRC3",
"must be zero on gfx10");
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index b58ba947c72e2..0bbab29dbda18 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -396,9 +396,17 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
EmitMCExpr(KD.kernarg_size);
OS << '\n';
- PrintField(
- KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
- amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, ".amdhsa_user_sgpr_count");
+ if (isGFX1250(STI)) {
+ PrintField(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
+ ".amdhsa_user_sgpr_count");
+ } else {
+ PrintField(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/155008
More information about the llvm-commits
mailing list