[llvm] e6564f3 - AMDGPU: Emit user sgpr count directives in text asm
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 26 10:51:26 PST 2022
Author: Matt Arsenault
Date: 2022-01-26T13:51:12-05:00
New Revision: e6564f39c787d6aa06ad5573e55db3439ed3a263
URL: https://github.com/llvm/llvm-project/commit/e6564f39c787d6aa06ad5573e55db3439ed3a263
DIFF: https://github.com/llvm/llvm-project/commit/e6564f39c787d6aa06ad5573e55db3439ed3a263.diff
LOG: AMDGPU: Emit user sgpr count directives in text asm
We were emitting these in the object file but not printing them.
Added:
llvm/test/MC/AMDGPU/user-sgpr-count-diag.s
llvm/test/MC/AMDGPU/user-sgpr-count.s
Modified:
llvm/docs/AMDGPUUsage.rst
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
llvm/test/CodeGen/AMDGPU/code-object-v3.ll
llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
llvm/test/CodeGen/AMDGPU/kernarg-size.ll
llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
llvm/test/MC/AMDGPU/hsa-v3.s
llvm/test/MC/AMDGPU/hsa-v4.s
Removed:
################################################################################
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index f592660f4965e..5f0abbec54fb5 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -4114,9 +4114,10 @@ The fields used by CP for code objects before V3 also match those specified in
Used by CP to set up
``COMPUTE_PGM_RSRC2.SCRATCH_EN``.
5:1 5 bits USER_SGPR_COUNT The total number of SGPR
- user data registers
- requested. This number must
- match the number of user
+ user data
+ registers requested. This
+ number must be greater than
+ or equal to the number of user
data registers enabled.
Used by CP to set up
@@ -12107,6 +12108,8 @@ terminated by an ``.end_amdhsa_kernel`` directive.
:ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
``.amdhsa_kernarg_size`` 0 GFX6-GFX10 Controls KERNARG_SIZE in
:ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
+ ``.amdhsa_user_sgpr_count`` 0 GFX6-GFX10 Controls USER_SGPR_COUNT in COMPUTE_PGM_RSRC2
+ :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`
``.amdhsa_user_sgpr_private_segment_buffer`` 0 GFX6-GFX10 Controls ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER in
:ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
``.amdhsa_user_sgpr_dispatch_ptr`` 0 GFX6-GFX10 Controls ENABLE_SGPR_DISPATCH_PTR in
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index c71205b17a1a5..f6c54fe3e7bf9 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -4568,7 +4568,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
uint64_t AccumOffset = 0;
SMRange SGPRRange;
uint64_t NextFreeSGPR = 0;
- unsigned UserSGPRCount = 0;
+
+ // Count the number of user SGPRs implied from the enabled feature bits.
+ unsigned ImpliedUserSGPRCount = 0;
+
+ // Track if the asm explicitly contains the directive for the user SGPR
+ // count.
+ Optional<unsigned> ExplicitUserSGPRCount;
bool ReserveVCC = true;
bool ReserveFlatScr = true;
Optional<bool> EnableWavefrontSize32;
@@ -4617,6 +4623,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
KD.kernarg_size = Val;
+ } else if (ID == ".amdhsa_user_sgpr_count") {
+ ExplicitUserSGPRCount = Val;
} else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
@@ -4626,31 +4634,31 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
Val, ValRange);
if (Val)
- UserSGPRCount += 4;
+ ImpliedUserSGPRCount += 4;
} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
ValRange);
if (Val)
- UserSGPRCount += 2;
+ ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
ValRange);
if (Val)
- UserSGPRCount += 2;
+ ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
Val, ValRange);
if (Val)
- UserSGPRCount += 2;
+ ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
ValRange);
if (Val)
- UserSGPRCount += 2;
+ ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
@@ -4660,13 +4668,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
ValRange);
if (Val)
- UserSGPRCount += 2;
+ ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
Val, ValRange);
if (Val)
- UserSGPRCount += 1;
+ ImpliedUserSGPRCount += 1;
} else if (ID == ".amdhsa_wavefront_size32") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
@@ -4850,6 +4858,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
SGPRBlocks);
+ if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
+ return TokError("amdgpu_user_sgpr_count smaller than than implied by "
+ "enabled user SGPRs");
+
+ unsigned UserSGPRCount =
+ ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
+
if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
return TokError("too many user SGPRs enabled");
AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 9a9a2c973f448..9578bdb0bad07 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -319,6 +319,10 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
<< KD.private_segment_fixed_size << '\n';
OS << "\t\t.amdhsa_kernarg_size " << KD.kernarg_size << '\n';
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_count", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT);
+
if (!hasArchitectedFlatScratch(STI))
PRINT_FIELD(
OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
diff --git a/llvm/test/CodeGen/AMDGPU/code-object-v3.ll b/llvm/test/CodeGen/AMDGPU/code-object-v3.ll
index 8389bbd1bfb56..2bc34cedd1e03 100644
--- a/llvm/test/CodeGen/AMDGPU/code-object-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/code-object-v3.ll
@@ -12,6 +12,7 @@
; OSABI-AMDHSA-ASM: .section .rodata,#alloc
; OSABI-AMDHSA-ASM: .p2align 6
; OSABI-AMDHSA-ASM: .amdhsa_kernel fadd
+; OSABI-AMDHSA-ASM: .amdhsa_user_sgpr_count 6
; OSABI-AMDHSA-ASM: .amdhsa_user_sgpr_private_segment_buffer 1
; OSABI-AMDHSA-ASM: .amdhsa_user_sgpr_kernarg_segment_ptr 1
; OSABI-AMDHSA-ASM: .amdhsa_next_free_vgpr 3
@@ -30,6 +31,7 @@
; OSABI-AMDHSA-ASM: .section .rodata,#alloc
; OSABI-AMDHSA-ASM: .p2align 6
; OSABI-AMDHSA-ASM: .amdhsa_kernel fsub
+; OSABI-AMDHSA-ASM: .amdhsa_user_sgpr_count 6
; OSABI-AMDHSA-ASM: .amdhsa_user_sgpr_private_segment_buffer 1
; OSABI-AMDHSA-ASM: .amdhsa_user_sgpr_kernarg_segment_ptr 1
; OSABI-AMDHSA-ASM: .amdhsa_next_free_vgpr 3
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
index 2afce5352b60e..aebbe33fd236c 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
@@ -57,6 +57,7 @@ define amdgpu_kernel void @indirect_call_known_no_special_inputs() {
; CHECK-NEXT: s_endpgm
; CHECK: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_user_sgpr_count 6
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
diff --git a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll
index ff409b938319b..d9fdfb0d101af 100644
--- a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll
@@ -9,6 +9,7 @@ declare void @llvm.debugtrap() #1
; HSA-NEXT: .amdhsa_group_segment_fixed_size 0
; HSA-NEXT: .amdhsa_private_segment_fixed_size 0
; HSA-NEXT: .amdhsa_kernarg_size 8
+; HSA-NEXT: .amdhsa_user_sgpr_count 8
; HSA-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; HSA: .end_amdhsa_kernel
diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
index 8e3e7776fb95e..7cc2b8214a36d 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
@@ -18,6 +18,7 @@ define amdgpu_kernel void @max_alignment_128() #0 {
; VI-NEXT: .amdhsa_group_segment_fixed_size 0
; VI-NEXT: .amdhsa_private_segment_fixed_size 256
; VI-NEXT: .amdhsa_kernarg_size 0
+; VI-NEXT: .amdhsa_user_sgpr_count 6
; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0
@@ -65,6 +66,7 @@ define amdgpu_kernel void @max_alignment_128() #0 {
; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
; GFX9-NEXT: .amdhsa_private_segment_fixed_size 256
; GFX9-NEXT: .amdhsa_kernarg_size 0
+; GFX9-NEXT: .amdhsa_user_sgpr_count 6
; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0
@@ -119,6 +121,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
; VI-NEXT: .amdhsa_group_segment_fixed_size 0
; VI-NEXT: .amdhsa_private_segment_fixed_size 8
; VI-NEXT: .amdhsa_kernarg_size 0
+; VI-NEXT: .amdhsa_user_sgpr_count 6
; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0
@@ -166,6 +169,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
; GFX9-NEXT: .amdhsa_private_segment_fixed_size 8
; GFX9-NEXT: .amdhsa_kernarg_size 0
+; GFX9-NEXT: .amdhsa_user_sgpr_count 6
; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0
@@ -220,6 +224,7 @@ define amdgpu_kernel void @alignstack_attr() #2 {
; VI-NEXT: .amdhsa_group_segment_fixed_size 0
; VI-NEXT: .amdhsa_private_segment_fixed_size 128
; VI-NEXT: .amdhsa_kernarg_size 0
+; VI-NEXT: .amdhsa_user_sgpr_count 6
; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0
@@ -267,6 +272,7 @@ define amdgpu_kernel void @alignstack_attr() #2 {
; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
; GFX9-NEXT: .amdhsa_private_segment_fixed_size 128
; GFX9-NEXT: .amdhsa_kernarg_size 0
+; GFX9-NEXT: .amdhsa_user_sgpr_count 6
; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0
diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
index b7a7ad824851e..690a86acbccc3 100644
--- a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
@@ -123,6 +123,7 @@ special_sgpr:
// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
// ASM-NEXT: .amdhsa_kernarg_size 8
+// ASM-NEXT: .amdhsa_user_sgpr_count 15
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
diff --git a/llvm/test/MC/AMDGPU/hsa-v3.s b/llvm/test/MC/AMDGPU/hsa-v3.s
index 76b82d4ae3321..9f854986d7bc4 100644
--- a/llvm/test/MC/AMDGPU/hsa-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-v3.s
@@ -132,6 +132,7 @@ disabled_user_sgpr:
// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
// ASM-NEXT: .amdhsa_kernarg_size 8
+// ASM-NEXT: .amdhsa_user_sgpr_count 15
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
@@ -276,7 +277,7 @@ v_mov_b32_e32 v16, s3
.end_amdgpu_metadata
// ASM: .amdgpu_metadata
-// ASM: amdhsa.kernels:
+// ASM: amdhsa.kernels:
// ASM: - .group_segment_fixed_size: 16
// ASM: .kernarg_segment_align: 64
// ASM: .kernarg_segment_size: 8
@@ -297,7 +298,7 @@ v_mov_b32_e32 v16, s3
// ASM: .symbol: 'amd_kernel_code_t_minimal at kd'
// ASM: .vgpr_count: 40
// ASM: .wavefront_size: 128
-// ASM: amdhsa.version:
+// ASM: amdhsa.version:
// ASM-NEXT: - 3
// ASM-NEXT: - 0
// ASM: .end_amdgpu_metadata
diff --git a/llvm/test/MC/AMDGPU/hsa-v4.s b/llvm/test/MC/AMDGPU/hsa-v4.s
index 61dbf75c3fa93..6a824b8bcc7b9 100644
--- a/llvm/test/MC/AMDGPU/hsa-v4.s
+++ b/llvm/test/MC/AMDGPU/hsa-v4.s
@@ -94,6 +94,7 @@ disabled_user_sgpr:
.amdhsa_group_segment_fixed_size 1
.amdhsa_private_segment_fixed_size 1
.amdhsa_kernarg_size 8
+ .amdhsa_user_sgpr_count 15
.amdhsa_user_sgpr_private_segment_buffer 1
.amdhsa_user_sgpr_dispatch_ptr 1
.amdhsa_user_sgpr_queue_ptr 1
@@ -132,6 +133,7 @@ disabled_user_sgpr:
// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
// ASM-NEXT: .amdhsa_kernarg_size 8
+// ASM-NEXT: .amdhsa_user_sgpr_count 15
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
@@ -276,7 +278,7 @@ v_mov_b32_e32 v16, s3
.end_amdgpu_metadata
// ASM: .amdgpu_metadata
-// ASM: amdhsa.kernels:
+// ASM: amdhsa.kernels:
// ASM: - .group_segment_fixed_size: 16
// ASM: .kernarg_segment_align: 64
// ASM: .kernarg_segment_size: 8
@@ -297,7 +299,7 @@ v_mov_b32_e32 v16, s3
// ASM: .symbol: 'amd_kernel_code_t_minimal at kd'
// ASM: .vgpr_count: 40
// ASM: .wavefront_size: 128
-// ASM: amdhsa.version:
+// ASM: amdhsa.version:
// ASM-NEXT: - 3
// ASM-NEXT: - 0
// ASM: .end_amdgpu_metadata
diff --git a/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s b/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s
new file mode 100644
index 0000000000000..edaeafae22efd
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s
@@ -0,0 +1,17 @@
+// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx810 %s 2>&1 >/dev/null | FileCheck -check-prefix=ERR %s
+
+.amdhsa_kernel implied_count_too_low_0
+ .amdhsa_user_sgpr_count 0
+ .amdhsa_user_sgpr_queue_ptr 1
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+// ERR: [[@LINE+1]]:19: error: amdgpu_user_sgpr_count smaller than than implied by enabled user SGPRs
+.end_amdhsa_kernel
+
+.amdhsa_kernel implied_count_too_low_1
+ .amdhsa_user_sgpr_count 1
+ .amdhsa_user_sgpr_queue_ptr 1
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+// ERR: [[@LINE+1]]:19: error: amdgpu_user_sgpr_count smaller than than implied by enabled user SGPRs
+.end_amdhsa_kernel
diff --git a/llvm/test/MC/AMDGPU/user-sgpr-count.s b/llvm/test/MC/AMDGPU/user-sgpr-count.s
new file mode 100644
index 0000000000000..ab363f91d334b
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/user-sgpr-count.s
@@ -0,0 +1,87 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
+
+.text
+// ASM: .text
+
+.amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack"
+// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack"
+
+
+// ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count
+// ASM: .amdhsa_user_sgpr_count 15
+.amdhsa_kernel user_sgprs_implied_count_all
+ .amdhsa_user_sgpr_private_segment_buffer 1
+ .amdhsa_user_sgpr_dispatch_ptr 1
+ .amdhsa_user_sgpr_queue_ptr 1
+ .amdhsa_user_sgpr_kernarg_segment_ptr 1
+ .amdhsa_user_sgpr_dispatch_id 1
+ .amdhsa_user_sgpr_flat_scratch_init 1
+ .amdhsa_user_sgpr_private_segment_size 1
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+// ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count_0
+// ASM: .amdhsa_user_sgpr_count 7
+.amdhsa_kernel user_sgprs_implied_count_0
+ .amdhsa_user_sgpr_queue_ptr 1
+ .amdhsa_user_sgpr_kernarg_segment_ptr 1
+ .amdhsa_user_sgpr_flat_scratch_init 1
+ .amdhsa_user_sgpr_private_segment_size 1
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+// ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count_1
+// ASM: .amdhsa_user_sgpr_count 9
+.amdhsa_kernel user_sgprs_implied_count_1
+ .amdhsa_user_sgpr_private_segment_buffer 1
+ .amdhsa_user_sgpr_queue_ptr 1
+ .amdhsa_user_sgpr_kernarg_segment_ptr 1
+ .amdhsa_user_sgpr_private_segment_size 1
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+
+// ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count_private_segment_buffer
+// ASM: .amdhsa_user_sgpr_count 4
+ .amdhsa_kernel user_sgprs_implied_count_private_segment_buffer
+ .amdhsa_user_sgpr_private_segment_buffer 1
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+
+// ASM-LABEL: .amdhsa_kernel explicit_user_sgpr_count_16
+.amdhsa_kernel explicit_user_sgpr_count_16
+ .amdhsa_user_sgpr_count 16
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+
+// ASM-LABEL: .amdhsa_kernel explicit_user_sgpr_count_0
+// ASM: .amdhsa_user_sgpr_count 0
+ .amdhsa_kernel explicit_user_sgpr_count_0
+ .amdhsa_user_sgpr_count 0
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+// ASM-LABEL: .amdhsa_kernel explicit_user_sgpr_count_1
+// ASM: .amdhsa_user_sgpr_count 1
+.amdhsa_kernel explicit_user_sgpr_count_1
+ .amdhsa_user_sgpr_count 1
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+.amdhsa_kernel explicit_user_sgpr_count_larger_than_implied
+ .amdhsa_user_sgpr_count 12
+ .amdhsa_user_sgpr_private_segment_buffer 1
+ .amdhsa_user_sgpr_queue_ptr 1
+ .amdhsa_user_sgpr_kernarg_segment_ptr 1
+ .amdhsa_next_free_vgpr 32
+ .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
More information about the llvm-commits
mailing list