[llvm] 5160447 - [AMDGPU] Add gfx10 assembler directive to specify shared VGPR count
Jacob Lambert via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 7 14:35:11 PST 2022
Author: Jacob Lambert
Date: 2022-03-07T14:27:41-08:00
New Revision: 5160447f5844bdb933fa321dc39fd2b9f783daad
URL: https://github.com/llvm/llvm-project/commit/5160447f5844bdb933fa321dc39fd2b9f783daad
DIFF: https://github.com/llvm/llvm-project/commit/5160447f5844bdb933fa321dc39fd2b9f783daad.diff
LOG: [AMDGPU] Add gfx10 assembler directive to specify shared VGPR count
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D105507
Added:
Modified:
llvm/docs/AMDGPUUsage.rst
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
llvm/test/MC/AMDGPU/hsa-diag-v3.s
llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
Removed:
################################################################################
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 01056270046d5..b3ad9c98b3098 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -4451,8 +4451,10 @@ The fields used by CP for code objects before V3 also match those specified in
======= ======= =============================== ===========================================================================
Bits Size Field Name Description
======= ======= =============================== ===========================================================================
- 3:0 4 bits SHARED_VGPR_COUNT Number of shared VGPRs for wavefront size 64. Granularity 8. Value 0-120.
- compute_pgm_rsrc1.vgprs + shared_vgpr_cnt cannot exceed 64.
+ 3:0 4 bits SHARED_VGPR_COUNT Number of shared VGPR blocks when executing in subvector mode. For
+ wavefront size 64 the value is 0-15, representing 0-120 VGPRs (granularity
+ of 8), such that (compute_pgm_rsrc1.vgprs +1)*4 + shared_vgpr_count*8 does
+ not exceed 256. For wavefront size 32 shared_vgpr_count must be 0.
31:4 28 Reserved, must be 0.
bits
32 **Total size 4 bytes.**
@@ -12372,6 +12374,8 @@ terminated by an ``.end_amdhsa_kernel`` directive.
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
``.amdhsa_forward_progress`` 0 GFX10 Controls FWD_PROGRESS in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
+ ``.amdhsa_shared_vgpr_count`` 0 GFX10 Controls SHARED_VGPR_COUNT in
+ :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx10-table`.
``.amdhsa_exception_fp_ieee_invalid_op`` 0 GFX6-GFX10 Controls ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`.
``.amdhsa_exception_fp_denorm_src`` 0 GFX6-GFX10 Controls ENABLE_EXCEPTION_FP_DENORMAL_SOURCE in
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index c3ac000eb300d..d188f4e3ab3a2 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -4645,6 +4645,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
SMRange VGPRRange;
uint64_t NextFreeVGPR = 0;
uint64_t AccumOffset = 0;
+ uint64_t SharedVGPRCount = 0;
SMRange SGPRRange;
uint64_t NextFreeSGPR = 0;
@@ -4872,6 +4873,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
ValRange);
+ } else if (ID == ".amdhsa_shared_vgpr_count") {
+ if (IVersion.Major < 10)
+ return Error(IDRange.Start, "directive requires gfx10+", IDRange);
+ SharedVGPRCount = Val;
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
+ COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
+ ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
@@ -4961,6 +4969,19 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
(AccumOffset / 4 - 1));
}
+ if (IVersion.Major == 10) {
+ // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
+ if (SharedVGPRCount && EnableWavefrontSize32) {
+ return TokError("shared_vgpr_count directive not valid on "
+ "wavefront size 32");
+ }
+ if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
+ return TokError("shared_vgpr_count*2 + "
+ "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
+ "exceed 63\n");
+ }
+ }
+
getTargetStreamer().EmitAmdhsaKernelDescriptor(
getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
ReserveFlatScr);
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index d7cb27c32a8c8..88ec2e3f5fea0 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -447,6 +447,8 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
+ PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT);
}
PRINT_FIELD(
OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
diff --git a/llvm/test/MC/AMDGPU/hsa-diag-v3.s b/llvm/test/MC/AMDGPU/hsa-diag-v3.s
index ee4543c9fb50b..aef2ecadb1673 100644
--- a/llvm/test/MC/AMDGPU/hsa-diag-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-diag-v3.s
@@ -225,6 +225,49 @@
.amdhsa_forward_progress 5
.end_amdhsa_kernel
+// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid1
+// NONGFX10: error: directive requires gfx10+
+// GFX10: error: .amdhsa_next_free_vgpr directive is required
+// NONAMDHSA: error: unknown directive
+.warning "test_amdhsa_shared_vgpr_count_invalid1"
+.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid1
+ .amdhsa_shared_vgpr_count 8
+.end_amdhsa_kernel
+
+// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid2
+// NONGFX10: error: directive requires gfx10+
+// GFX10: error: shared_vgpr_count directive not valid on wavefront size 32
+// NONAMDHSA: error: unknown directive
+.warning "test_amdhsa_shared_vgpr_count_invalid2"
+.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid2
+ .amdhsa_next_free_vgpr 16
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_shared_vgpr_count 8
+ .amdhsa_wavefront_size32 1
+.end_amdhsa_kernel
+
+// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid3
+// NONGFX10: error: directive requires gfx10+
+// GFX10: error: value out of range
+// NONAMDHSA: error: unknown directive
+.warning "test_amdhsa_shared_vgpr_count_invalid3"
+.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid3
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_shared_vgpr_count 16
+.end_amdhsa_kernel
+
+// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid4
+// NONGFX10: error: directive requires gfx10+
+// GFX10: error: shared_vgpr_count*2 + compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot exceed 63
+// NONAMDHSA: error: unknown directive
+.warning "test_amdhsa_shared_vgpr_count_invalid4"
+.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid4
+ .amdhsa_next_free_vgpr 273
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_shared_vgpr_count 15
+.end_amdhsa_kernel
+
// GCN-LABEL: warning: test_next_free_vgpr_invalid
// AMDHSA: error: .amdgcn.next_free_{v,s}gpr symbols must be absolute expressions
// NONAMDHSA-NOT: error:
diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
index 690a86acbccc3..ba60000837cdc 100644
--- a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
@@ -68,11 +68,13 @@ special_sgpr:
.amdhsa_kernel minimal
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 0
+ .amdhsa_shared_vgpr_count 0
.end_amdhsa_kernel
// ASM: .amdhsa_kernel minimal
// ASM: .amdhsa_next_free_vgpr 0
// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM: .amdhsa_shared_vgpr_count 0
// ASM: .end_amdhsa_kernel
// Test that we can specify all available directives with non-default values.
@@ -153,6 +155,7 @@ special_sgpr:
// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
// ASM-NEXT: .amdhsa_memory_ordered 1
// ASM-NEXT: .amdhsa_forward_progress 1
+// ASM-NEXT: .amdhsa_shared_vgpr_count 0
// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
More information about the llvm-commits
mailing list