[llvm] e6564f3 - AMDGPU: Emit user sgpr count directives in text asm

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 26 10:51:26 PST 2022


Author: Matt Arsenault
Date: 2022-01-26T13:51:12-05:00
New Revision: e6564f39c787d6aa06ad5573e55db3439ed3a263

URL: https://github.com/llvm/llvm-project/commit/e6564f39c787d6aa06ad5573e55db3439ed3a263
DIFF: https://github.com/llvm/llvm-project/commit/e6564f39c787d6aa06ad5573e55db3439ed3a263.diff

LOG: AMDGPU: Emit user sgpr count directives in text asm

We were emitting these in the object file but not printing them.

Added: 
    llvm/test/MC/AMDGPU/user-sgpr-count-diag.s
    llvm/test/MC/AMDGPU/user-sgpr-count.s

Modified: 
    llvm/docs/AMDGPUUsage.rst
    llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
    llvm/test/CodeGen/AMDGPU/code-object-v3.ll
    llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
    llvm/test/CodeGen/AMDGPU/kernarg-size.ll
    llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
    llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
    llvm/test/MC/AMDGPU/hsa-v3.s
    llvm/test/MC/AMDGPU/hsa-v4.s

Removed: 
    


################################################################################
diff  --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index f592660f4965e..5f0abbec54fb5 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -4114,9 +4114,10 @@ The fields used by CP for code objects before V3 also match those specified in
                                                      Used by CP to set up
                                                      ``COMPUTE_PGM_RSRC2.SCRATCH_EN``.
      5:1     5 bits  USER_SGPR_COUNT                 The total number of SGPR
-                                                     user data registers
-                                                     requested. This number must
-                                                     match the number of user
+                                                     user data
+                                                     registers requested. This
+                                                     number must be greater than
+                                                     or equal to the number of user
                                                      data registers enabled.
 
                                                      Used by CP to set up
@@ -12107,6 +12108,8 @@ terminated by an ``.end_amdhsa_kernel`` directive.
                                                                                                :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
      ``.amdhsa_kernarg_size``                                 0                   GFX6-GFX10   Controls KERNARG_SIZE in
                                                                                                :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
+     ``.amdhsa_user_sgpr_count``                              0                   GFX6-GFX10   Controls USER_SGPR_COUNT in COMPUTE_PGM_RSRC2
+                                                                                               :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`
      ``.amdhsa_user_sgpr_private_segment_buffer``             0                   GFX6-GFX10   Controls ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER in
                                                                                                :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
      ``.amdhsa_user_sgpr_dispatch_ptr``                       0                   GFX6-GFX10   Controls ENABLE_SGPR_DISPATCH_PTR in

diff  --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index c71205b17a1a5..f6c54fe3e7bf9 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -4568,7 +4568,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
   uint64_t AccumOffset = 0;
   SMRange SGPRRange;
   uint64_t NextFreeSGPR = 0;
-  unsigned UserSGPRCount = 0;
+
+  // Count the number of user SGPRs implied from the enabled feature bits.
+  unsigned ImpliedUserSGPRCount = 0;
+
+  // Track if the asm explicitly contains the directive for the user SGPR
+  // count.
+  Optional<unsigned> ExplicitUserSGPRCount;
   bool ReserveVCC = true;
   bool ReserveFlatScr = true;
   Optional<bool> EnableWavefrontSize32;
@@ -4617,6 +4623,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
         return OutOfRangeError(ValRange);
       KD.kernarg_size = Val;
+    } else if (ID == ".amdhsa_user_sgpr_count") {
+      ExplicitUserSGPRCount = Val;
     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
       if (hasArchitectedFlatScratch())
         return Error(IDRange.Start,
@@ -4626,31 +4634,31 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
                        Val, ValRange);
       if (Val)
-        UserSGPRCount += 4;
+        ImpliedUserSGPRCount += 4;
     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
                        ValRange);
       if (Val)
-        UserSGPRCount += 2;
+        ImpliedUserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
                        ValRange);
       if (Val)
-        UserSGPRCount += 2;
+        ImpliedUserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
                        Val, ValRange);
       if (Val)
-        UserSGPRCount += 2;
+        ImpliedUserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
                        ValRange);
       if (Val)
-        UserSGPRCount += 2;
+        ImpliedUserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
       if (hasArchitectedFlatScratch())
         return Error(IDRange.Start,
@@ -4660,13 +4668,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
                        ValRange);
       if (Val)
-        UserSGPRCount += 2;
+        ImpliedUserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
                        Val, ValRange);
       if (Val)
-        UserSGPRCount += 1;
+        ImpliedUserSGPRCount += 1;
     } else if (ID == ".amdhsa_wavefront_size32") {
       if (IVersion.Major < 10)
         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
@@ -4850,6 +4858,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
                   SGPRBlocks);
 
+  if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
+    return TokError("amdgpu_user_sgpr_count smaller than than implied by "
+                    "enabled user SGPRs");
+
+  unsigned UserSGPRCount =
+      ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
+
   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
     return TokError("too many user SGPRs enabled");
   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,

diff  --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 9a9a2c973f448..9578bdb0bad07 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -319,6 +319,10 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
      << KD.private_segment_fixed_size << '\n';
   OS << "\t\t.amdhsa_kernarg_size " << KD.kernarg_size << '\n';
 
+  PRINT_FIELD(OS, ".amdhsa_user_sgpr_count", KD,
+              compute_pgm_rsrc2,
+              amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT);
+
   if (!hasArchitectedFlatScratch(STI))
     PRINT_FIELD(
         OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,

diff  --git a/llvm/test/CodeGen/AMDGPU/code-object-v3.ll b/llvm/test/CodeGen/AMDGPU/code-object-v3.ll
index 8389bbd1bfb56..2bc34cedd1e03 100644
--- a/llvm/test/CodeGen/AMDGPU/code-object-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/code-object-v3.ll
@@ -12,6 +12,7 @@
 ; OSABI-AMDHSA-ASM: .section .rodata,#alloc
 ; OSABI-AMDHSA-ASM: .p2align 6
 ; OSABI-AMDHSA-ASM: .amdhsa_kernel fadd
+; OSABI-AMDHSA-ASM:     .amdhsa_user_sgpr_count 6
 ; OSABI-AMDHSA-ASM:     .amdhsa_user_sgpr_private_segment_buffer 1
 ; OSABI-AMDHSA-ASM:     .amdhsa_user_sgpr_kernarg_segment_ptr 1
 ; OSABI-AMDHSA-ASM:     .amdhsa_next_free_vgpr 3
@@ -30,6 +31,7 @@
 ; OSABI-AMDHSA-ASM: .section .rodata,#alloc
 ; OSABI-AMDHSA-ASM: .p2align 6
 ; OSABI-AMDHSA-ASM: .amdhsa_kernel fsub
+; OSABI-AMDHSA-ASM:     .amdhsa_user_sgpr_count 6
 ; OSABI-AMDHSA-ASM:     .amdhsa_user_sgpr_private_segment_buffer 1
 ; OSABI-AMDHSA-ASM:     .amdhsa_user_sgpr_kernarg_segment_ptr 1
 ; OSABI-AMDHSA-ASM:     .amdhsa_next_free_vgpr 3

diff  --git a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
index 2afce5352b60e..aebbe33fd236c 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
@@ -57,6 +57,7 @@ define amdgpu_kernel void @indirect_call_known_no_special_inputs() {
 ; CHECK-NEXT:    s_endpgm
 
 ; CHECK: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_user_sgpr_count 6
 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
 ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
 ; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0

diff  --git a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll
index ff409b938319b..d9fdfb0d101af 100644
--- a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll
@@ -9,6 +9,7 @@ declare void @llvm.debugtrap() #1
 ; HSA-NEXT:     .amdhsa_group_segment_fixed_size 0
 ; HSA-NEXT:     .amdhsa_private_segment_fixed_size 0
 ; HSA-NEXT:     .amdhsa_kernarg_size 8
+; HSA-NEXT:     .amdhsa_user_sgpr_count 8
 ; HSA-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
 ; HSA:      .end_amdhsa_kernel
 

diff  --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
index 8e3e7776fb95e..7cc2b8214a36d 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
@@ -18,6 +18,7 @@ define amdgpu_kernel void @max_alignment_128() #0 {
 ; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
 ; VI-NEXT:     .amdhsa_private_segment_fixed_size 256
 ; VI-NEXT:     .amdhsa_kernarg_size 0
+; VI-NEXT:     .amdhsa_user_sgpr_count 6
 ; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
 ; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
 ; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
@@ -65,6 +66,7 @@ define amdgpu_kernel void @max_alignment_128() #0 {
 ; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
 ; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 256
 ; GFX9-NEXT:     .amdhsa_kernarg_size 0
+; GFX9-NEXT:     .amdhsa_user_sgpr_count 6
 ; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
 ; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
 ; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
@@ -119,6 +121,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
 ; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
 ; VI-NEXT:     .amdhsa_private_segment_fixed_size 8
 ; VI-NEXT:     .amdhsa_kernarg_size 0
+; VI-NEXT:     .amdhsa_user_sgpr_count 6
 ; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
 ; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
 ; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
@@ -166,6 +169,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
 ; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
 ; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 8
 ; GFX9-NEXT:     .amdhsa_kernarg_size 0
+; GFX9-NEXT:     .amdhsa_user_sgpr_count 6
 ; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
 ; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
 ; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
@@ -220,6 +224,7 @@ define amdgpu_kernel void @alignstack_attr() #2 {
 ; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
 ; VI-NEXT:     .amdhsa_private_segment_fixed_size 128
 ; VI-NEXT:     .amdhsa_kernarg_size 0
+; VI-NEXT:     .amdhsa_user_sgpr_count 6
 ; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
 ; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
 ; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
@@ -267,6 +272,7 @@ define amdgpu_kernel void @alignstack_attr() #2 {
 ; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
 ; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 128
 ; GFX9-NEXT:     .amdhsa_kernarg_size 0
+; GFX9-NEXT:     .amdhsa_user_sgpr_count 6
 ; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
 ; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
 ; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0

diff  --git a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
index b7a7ad824851e..690a86acbccc3 100644
--- a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
@@ -123,6 +123,7 @@ special_sgpr:
 // ASM-NEXT: .amdhsa_group_segment_fixed_size 1
 // ASM-NEXT: .amdhsa_private_segment_fixed_size 1
 // ASM-NEXT: .amdhsa_kernarg_size 8
+// ASM-NEXT: .amdhsa_user_sgpr_count 15
 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
 // ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1

diff  --git a/llvm/test/MC/AMDGPU/hsa-v3.s b/llvm/test/MC/AMDGPU/hsa-v3.s
index 76b82d4ae3321..9f854986d7bc4 100644
--- a/llvm/test/MC/AMDGPU/hsa-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-v3.s
@@ -132,6 +132,7 @@ disabled_user_sgpr:
 // ASM-NEXT: .amdhsa_group_segment_fixed_size 1
 // ASM-NEXT: .amdhsa_private_segment_fixed_size 1
 // ASM-NEXT: .amdhsa_kernarg_size 8
+// ASM-NEXT: .amdhsa_user_sgpr_count 15
 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
 // ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
@@ -276,7 +277,7 @@ v_mov_b32_e32 v16, s3
 .end_amdgpu_metadata
 
 // ASM:      	.amdgpu_metadata
-// ASM:      amdhsa.kernels:  
+// ASM:      amdhsa.kernels:
 // ASM:        - .group_segment_fixed_size: 16
 // ASM:          .kernarg_segment_align: 64
 // ASM:          .kernarg_segment_size: 8
@@ -297,7 +298,7 @@ v_mov_b32_e32 v16, s3
 // ASM:          .symbol:         'amd_kernel_code_t_minimal at kd'
 // ASM:          .vgpr_count:     40
 // ASM:          .wavefront_size: 128
-// ASM:      amdhsa.version:  
+// ASM:      amdhsa.version:
 // ASM-NEXT:   - 3
 // ASM-NEXT:   - 0
 // ASM:      	.end_amdgpu_metadata

diff  --git a/llvm/test/MC/AMDGPU/hsa-v4.s b/llvm/test/MC/AMDGPU/hsa-v4.s
index 61dbf75c3fa93..6a824b8bcc7b9 100644
--- a/llvm/test/MC/AMDGPU/hsa-v4.s
+++ b/llvm/test/MC/AMDGPU/hsa-v4.s
@@ -94,6 +94,7 @@ disabled_user_sgpr:
   .amdhsa_group_segment_fixed_size 1
   .amdhsa_private_segment_fixed_size 1
   .amdhsa_kernarg_size 8
+  .amdhsa_user_sgpr_count 15
   .amdhsa_user_sgpr_private_segment_buffer 1
   .amdhsa_user_sgpr_dispatch_ptr 1
   .amdhsa_user_sgpr_queue_ptr 1
@@ -132,6 +133,7 @@ disabled_user_sgpr:
 // ASM-NEXT: .amdhsa_group_segment_fixed_size 1
 // ASM-NEXT: .amdhsa_private_segment_fixed_size 1
 // ASM-NEXT: .amdhsa_kernarg_size 8
+// ASM-NEXT: .amdhsa_user_sgpr_count 15
 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
 // ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
@@ -276,7 +278,7 @@ v_mov_b32_e32 v16, s3
 .end_amdgpu_metadata
 
 // ASM:      	.amdgpu_metadata
-// ASM:      amdhsa.kernels:  
+// ASM:      amdhsa.kernels:
 // ASM:        - .group_segment_fixed_size: 16
 // ASM:          .kernarg_segment_align: 64
 // ASM:          .kernarg_segment_size: 8
@@ -297,7 +299,7 @@ v_mov_b32_e32 v16, s3
 // ASM:          .symbol:         'amd_kernel_code_t_minimal at kd'
 // ASM:          .vgpr_count:     40
 // ASM:          .wavefront_size: 128
-// ASM:      amdhsa.version:  
+// ASM:      amdhsa.version:
 // ASM-NEXT:   - 3
 // ASM-NEXT:   - 0
 // ASM:      	.end_amdgpu_metadata

diff  --git a/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s b/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s
new file mode 100644
index 0000000000000..edaeafae22efd
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s
@@ -0,0 +1,17 @@
+// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx810 %s 2>&1 >/dev/null | FileCheck -check-prefix=ERR %s
+
+.amdhsa_kernel implied_count_too_low_0
+  .amdhsa_user_sgpr_count 0
+  .amdhsa_user_sgpr_queue_ptr 1
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+// ERR: [[@LINE+1]]:19: error: amdgpu_user_sgpr_count smaller than than implied by enabled user SGPRs
+.end_amdhsa_kernel
+
+.amdhsa_kernel implied_count_too_low_1
+  .amdhsa_user_sgpr_count 1
+  .amdhsa_user_sgpr_queue_ptr 1
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+// ERR: [[@LINE+1]]:19: error: amdgpu_user_sgpr_count smaller than than implied by enabled user SGPRs
+.end_amdhsa_kernel

diff  --git a/llvm/test/MC/AMDGPU/user-sgpr-count.s b/llvm/test/MC/AMDGPU/user-sgpr-count.s
new file mode 100644
index 0000000000000..ab363f91d334b
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/user-sgpr-count.s
@@ -0,0 +1,87 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
+
+.text
+// ASM: .text
+
+.amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack"
+// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack"
+
+
+// ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count
+// ASM: .amdhsa_user_sgpr_count 15
+.amdhsa_kernel user_sgprs_implied_count_all
+  .amdhsa_user_sgpr_private_segment_buffer 1
+  .amdhsa_user_sgpr_dispatch_ptr 1
+  .amdhsa_user_sgpr_queue_ptr 1
+  .amdhsa_user_sgpr_kernarg_segment_ptr 1
+  .amdhsa_user_sgpr_dispatch_id 1
+  .amdhsa_user_sgpr_flat_scratch_init 1
+  .amdhsa_user_sgpr_private_segment_size 1
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+// ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count_0
+// ASM: .amdhsa_user_sgpr_count 7
+.amdhsa_kernel user_sgprs_implied_count_0
+  .amdhsa_user_sgpr_queue_ptr 1
+  .amdhsa_user_sgpr_kernarg_segment_ptr 1
+  .amdhsa_user_sgpr_flat_scratch_init 1
+  .amdhsa_user_sgpr_private_segment_size 1
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+// ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count_1
+// ASM: .amdhsa_user_sgpr_count 9
+.amdhsa_kernel user_sgprs_implied_count_1
+  .amdhsa_user_sgpr_private_segment_buffer 1
+  .amdhsa_user_sgpr_queue_ptr 1
+  .amdhsa_user_sgpr_kernarg_segment_ptr 1
+  .amdhsa_user_sgpr_private_segment_size 1
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+
+// ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count_private_segment_buffer
+// ASM: .amdhsa_user_sgpr_count 4
+  .amdhsa_kernel user_sgprs_implied_count_private_segment_buffer
+  .amdhsa_user_sgpr_private_segment_buffer 1
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+
+// ASM-LABEL: .amdhsa_kernel explicit_user_sgpr_count_16
+.amdhsa_kernel explicit_user_sgpr_count_16
+  .amdhsa_user_sgpr_count 16
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+
+// ASM-LABEL: .amdhsa_kernel explicit_user_sgpr_count_0
+// ASM: .amdhsa_user_sgpr_count 0
+  .amdhsa_kernel explicit_user_sgpr_count_0
+  .amdhsa_user_sgpr_count 0
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+// ASM-LABEL: .amdhsa_kernel explicit_user_sgpr_count_1
+// ASM: .amdhsa_user_sgpr_count 1
+.amdhsa_kernel explicit_user_sgpr_count_1
+  .amdhsa_user_sgpr_count 1
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+.amdhsa_kernel explicit_user_sgpr_count_larger_than_implied
+  .amdhsa_user_sgpr_count 12
+  .amdhsa_user_sgpr_private_segment_buffer 1
+  .amdhsa_user_sgpr_queue_ptr 1
+  .amdhsa_user_sgpr_kernarg_segment_ptr 1
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel


        


More information about the llvm-commits mailing list