[llvm] 5160447 - [AMDGPU] Add gfx10 assembler directive to specify shared VGPR count

Jacob Lambert via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 7 14:35:11 PST 2022


Author: Jacob Lambert
Date: 2022-03-07T14:27:41-08:00
New Revision: 5160447f5844bdb933fa321dc39fd2b9f783daad

URL: https://github.com/llvm/llvm-project/commit/5160447f5844bdb933fa321dc39fd2b9f783daad
DIFF: https://github.com/llvm/llvm-project/commit/5160447f5844bdb933fa321dc39fd2b9f783daad.diff

LOG: [AMDGPU] Add gfx10 assembler directive to specify shared VGPR count

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D105507

Added: 
    

Modified: 
    llvm/docs/AMDGPUUsage.rst
    llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
    llvm/test/MC/AMDGPU/hsa-diag-v3.s
    llvm/test/MC/AMDGPU/hsa-gfx10-v3.s

Removed: 
    


################################################################################
diff  --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 01056270046d5..b3ad9c98b3098 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -4451,8 +4451,10 @@ The fields used by CP for code objects before V3 also match those specified in
      ======= ======= =============================== ===========================================================================
      Bits    Size    Field Name                      Description
      ======= ======= =============================== ===========================================================================
-     3:0     4 bits  SHARED_VGPR_COUNT               Number of shared VGPRs for wavefront size 64. Granularity 8. Value 0-120.
-                                                     compute_pgm_rsrc1.vgprs + shared_vgpr_cnt cannot exceed 64.
+     3:0     4 bits  SHARED_VGPR_COUNT               Number of shared VGPR blocks when executing in subvector mode. For
+                                                     wavefront size 64 the value is 0-15, representing 0-120 VGPRs (granularity
+                                                     of 8), such that (compute_pgm_rsrc1.vgprs +1)*4 + shared_vgpr_count*8 does
+                                                     not exceed 256. For wavefront size 32 shared_vgpr_count must be 0.
      31:4    28                                      Reserved, must be 0.
              bits
      32      **Total size 4 bytes.**
@@ -12372,6 +12374,8 @@ terminated by an ``.end_amdhsa_kernel`` directive.
                                                                                                :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
      ``.amdhsa_forward_progress``                             0                   GFX10        Controls FWD_PROGRESS in
                                                                                                :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
+     ``.amdhsa_shared_vgpr_count``                            0                   GFX10        Controls SHARED_VGPR_COUNT in
+                                                                                               :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx10-table`.
      ``.amdhsa_exception_fp_ieee_invalid_op``                 0                   GFX6-GFX10   Controls ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION in
                                                                                                :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`.
      ``.amdhsa_exception_fp_denorm_src``                      0                   GFX6-GFX10   Controls ENABLE_EXCEPTION_FP_DENORMAL_SOURCE in

diff  --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index c3ac000eb300d..d188f4e3ab3a2 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -4645,6 +4645,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
   SMRange VGPRRange;
   uint64_t NextFreeVGPR = 0;
   uint64_t AccumOffset = 0;
+  uint64_t SharedVGPRCount = 0;
   SMRange SGPRRange;
   uint64_t NextFreeSGPR = 0;
 
@@ -4872,6 +4873,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
                        ValRange);
+    } else if (ID == ".amdhsa_shared_vgpr_count") {
+      if (IVersion.Major < 10)
+        return Error(IDRange.Start, "directive requires gfx10+", IDRange);
+      SharedVGPRCount = Val;
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
+                       COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
+                       ValRange);
     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
       PARSE_BITS_ENTRY(
           KD.compute_pgm_rsrc2,
@@ -4961,6 +4969,19 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
                     (AccumOffset / 4 - 1));
   }
 
+  if (IVersion.Major == 10) {
+    // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
+    if (SharedVGPRCount && EnableWavefrontSize32) {
+      return TokError("shared_vgpr_count directive not valid on "
+                      "wavefront size 32");
+    }
+    if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
+      return TokError("shared_vgpr_count*2 + "
+                      "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
+                      "exceed 63\n");
+    }
+  }
+
   getTargetStreamer().EmitAmdhsaKernelDescriptor(
       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
       ReserveFlatScr);

diff  --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index d7cb27c32a8c8..88ec2e3f5fea0 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -447,6 +447,8 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
     PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
                 compute_pgm_rsrc1,
                 amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
+    PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
+                amdhsa::COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT);
   }
   PRINT_FIELD(
       OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,

diff  --git a/llvm/test/MC/AMDGPU/hsa-diag-v3.s b/llvm/test/MC/AMDGPU/hsa-diag-v3.s
index ee4543c9fb50b..aef2ecadb1673 100644
--- a/llvm/test/MC/AMDGPU/hsa-diag-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-diag-v3.s
@@ -225,6 +225,49 @@
   .amdhsa_forward_progress 5
 .end_amdhsa_kernel
 
+// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid1
+// NONGFX10: error: directive requires gfx10+
+// GFX10: error: .amdhsa_next_free_vgpr directive is required
+// NONAMDHSA: error: unknown directive
+.warning "test_amdhsa_shared_vgpr_count_invalid1"
+.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid1
+  .amdhsa_shared_vgpr_count 8
+.end_amdhsa_kernel
+
+// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid2
+// NONGFX10: error: directive requires gfx10+
+// GFX10: error: shared_vgpr_count directive not valid on wavefront size 32
+// NONAMDHSA: error: unknown directive
+.warning "test_amdhsa_shared_vgpr_count_invalid2"
+.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid2
+  .amdhsa_next_free_vgpr 16
+  .amdhsa_next_free_sgpr 0
+  .amdhsa_shared_vgpr_count 8
+  .amdhsa_wavefront_size32 1
+.end_amdhsa_kernel
+
+// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid3
+// NONGFX10: error: directive requires gfx10+
+// GFX10: error: value out of range
+// NONAMDHSA: error: unknown directive
+.warning "test_amdhsa_shared_vgpr_count_invalid3"
+.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid3
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 0
+  .amdhsa_shared_vgpr_count 16
+.end_amdhsa_kernel
+
+// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid4
+// NONGFX10: error: directive requires gfx10+
+// GFX10: error: shared_vgpr_count*2 + compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot exceed 63
+// NONAMDHSA: error: unknown directive
+.warning "test_amdhsa_shared_vgpr_count_invalid4"
+.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid4
+  .amdhsa_next_free_vgpr 273
+  .amdhsa_next_free_sgpr 0
+  .amdhsa_shared_vgpr_count 15
+.end_amdhsa_kernel
+
 // GCN-LABEL: warning: test_next_free_vgpr_invalid
 // AMDHSA: error: .amdgcn.next_free_{v,s}gpr symbols must be absolute expressions
 // NONAMDHSA-NOT: error:

diff  --git a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
index 690a86acbccc3..ba60000837cdc 100644
--- a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
@@ -68,11 +68,13 @@ special_sgpr:
 .amdhsa_kernel minimal
   .amdhsa_next_free_vgpr 0
   .amdhsa_next_free_sgpr 0
+  .amdhsa_shared_vgpr_count 0
 .end_amdhsa_kernel
 
 // ASM: .amdhsa_kernel minimal
 // ASM: .amdhsa_next_free_vgpr 0
 // ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM: .amdhsa_shared_vgpr_count 0
 // ASM: .end_amdhsa_kernel
 
 // Test that we can specify all available directives with non-default values.
@@ -153,6 +155,7 @@ special_sgpr:
 // ASM-NEXT: .amdhsa_workgroup_processor_mode 1
 // ASM-NEXT: .amdhsa_memory_ordered 1
 // ASM-NEXT: .amdhsa_forward_progress 1
+// ASM-NEXT: .amdhsa_shared_vgpr_count 0
 // ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
 // ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
 // ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1


        


More information about the llvm-commits mailing list