[llvm] d96361d - [AMDGPU] Add the uses_dynamic_stack field to the kernel descriptor and the kernel metadata map

Abinav Puthan Purayil via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 17 21:37:48 PDT 2022


Author: Abinav Puthan Purayil
Date: 2022-07-18T10:07:13+05:30
New Revision: d96361d714ee0b646147b2d29575cffb6648c46b

URL: https://github.com/llvm/llvm-project/commit/d96361d714ee0b646147b2d29575cffb6648c46b
DIFF: https://github.com/llvm/llvm-project/commit/d96361d714ee0b646147b2d29575cffb6648c46b.diff

LOG: [AMDGPU] Add the uses_dynamic_stack field to the kernel descriptor and the kernel metadata map

This change introduces the dynamic stack boolean field to code-object-v3
and above under the code properties of the kernel descriptor and under
the kernel metadata map of NT_AMDGPU_METADATA. This field corresponds to
the is_dynamic_callstack field of amd_kernel_code_t.

Differential Revision: https://reviews.llvm.org/D128344

Added: 
    

Modified: 
    llvm/docs/AMDGPUUsage.rst
    llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
    llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
    llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
    llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
    llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
    llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
    llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
    llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
    llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
    llvm/test/MC/AMDGPU/hsa-gfx11-v3.s
    llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s
    llvm/test/MC/AMDGPU/hsa-gfx940-v3.s
    llvm/test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s
    llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s
    llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s
    llvm/test/MC/AMDGPU/hsa-v3.s
    llvm/test/MC/AMDGPU/hsa-v4.s
    llvm/test/tools/llvm-readobj/ELF/note-amd-valid-v3.s
    llvm/test/tools/llvm-readobj/ELF/note-amdgpu.test

Removed: 
    


################################################################################
diff  --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 26cba99e9b371..f591e60862fbe 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -3219,6 +3219,9 @@ same *vendor-name*.
                                                                   arguments in the
                                                                   kernarg segment. Must
                                                                   be a power of 2.
+     ".uses_dynamic_stack"               boolean                  Indicates if the generated
+                                                                  machine code is using a
+                                                                  dynamically sized stack.
      ".wavefront_size"                   integer        Required  Wavefront size. Must
                                                                   be a power of 2.
      ".sgpr_count"                       integer        Required  Number of scalar
@@ -3997,7 +4000,10 @@ The fields used by CP for code objects before V3 also match those specified in
                                                        - If 1 execute in
                                                          native wavefront size
                                                          32 mode.
-     463:459 1 bit                                   Reserved, must be 0.
+     459     1 bit   USES_DYNAMIC_STACK              Indicates if the generated
+                                                     machine code is using a
+                                                     dynamically sized stack.
+     463:460 1 bit                                   Reserved, must be 0.
      464     1 bit   RESERVED_464                    Deprecated, must be 0.
      467:465 3 bits                                  Reserved, must be 0.
      468     1 bit   RESERVED_468                    Deprecated, must be 0.
@@ -14847,6 +14853,8 @@ terminated by an ``.end_amdhsa_kernel`` directive.
                                                               Feature                          :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
                                                               Specific
                                                               (wavefrontsize64)
+     ``.amdhsa_uses_dynamic_stack``                           0                   GFX6-GFX11   Controls USES_DYNAMIC_STACK in
+                                                                                               :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
      ``.amdhsa_system_sgpr_private_segment_wavefront_offset`` 0                   GFX6-GFX10   Controls ENABLE_PRIVATE_SEGMENT in
                                                                                   (except      :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx11-table`.
                                                                                   GFX940)

diff  --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
index 41d144cfd5c41..61b05743faf62 100644
--- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -161,7 +161,8 @@ enum : int32_t {
   KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
   KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
   KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+
-  KERNEL_CODE_PROPERTY(RESERVED1, 11, 5),
+  KERNEL_CODE_PROPERTY(USES_DYNAMIC_STACK, 11, 1),
+  KERNEL_CODE_PROPERTY(RESERVED1, 12, 4),
 };
 #undef KERNEL_CODE_PROPERTY
 

diff  --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
index 1613e7e42a0af..c5ab35d948609 100644
--- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
+++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
@@ -260,6 +260,9 @@ bool MetadataVerifier::verifyKernel(msgpack::DocNode &Node) {
     return false;
   if (!verifyIntegerEntry(KernelMap, ".private_segment_fixed_size", true))
     return false;
+  if (!verifyScalarEntry(KernelMap, ".uses_dynamic_stack", false,
+                         msgpack::Type::Boolean))
+    return false;
   if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_align", true))
     return false;
   if (!verifyIntegerEntry(KernelMap, ".wavefront_size", true))

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index f1cc40b3a69af..13a65f1ad6017 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -417,6 +417,10 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
   }
 
+  if (CurrentProgramInfo.DynamicCallStack) {
+    KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK;
+  }
+
   return KernelCodeProperties;
 }
 

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 6fa44ffcbfaa4..632a76b32009b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -875,6 +875,8 @@ MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
       Kern.getDocument()->getNode(ProgramInfo.LDSSize);
   Kern[".private_segment_fixed_size"] =
       Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
+  Kern[".uses_dynamic_stack"] =
+      Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack);
 
   // FIXME: The metadata treats the minimum as 16?
   Kern[".kernarg_segment_align"] =

diff  --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index d38182b0447eb..201a7ff22c35a 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5001,6 +5001,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
                        Val, ValRange);
+    } else if (ID == ".amdhsa_uses_dynamic_stack") {
+      PARSE_BITS_ENTRY(KD.kernel_code_properties,
+                       KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
       if (hasArchitectedFlatScratch())
         return Error(IDRange.Start,

diff  --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 0f7545cb07db3..98ee720200b43 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -2040,6 +2040,9 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
                       KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
     }
 
+    PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
+                    KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
+
     if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1)
       return MCDisassembler::Fail;
 

diff  --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 0781334695491..0e71509cf2bd2 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -367,6 +367,8 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
     PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD,
                 kernel_code_properties,
                 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
+  PRINT_FIELD(OS, ".amdhsa_uses_dynamic_stack", KD, kernel_code_properties,
+              amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
   PRINT_FIELD(OS,
               (hasArchitectedFlatScratch(STI)
                    ? ".amdhsa_enable_private_segment"

diff  --git a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
index 831d94b76770f..178b2460e1683 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
@@ -21,6 +21,7 @@
 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
 ; GCN-NEXT: .amdhsa_wavefront_size32 1
+; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
 ; GCN-NEXT: .amdhsa_enable_private_segment 0
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
@@ -46,6 +47,7 @@ define amdgpu_kernel void @minimal_kernel_inputs() {
 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
 ; GCN-NEXT: .amdhsa_wavefront_size32 1
+; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
 ; GCN-NEXT: .amdhsa_enable_private_segment 1
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
@@ -75,6 +77,7 @@ define amdgpu_kernel void @minimal_kernel_inputs_with_stack() {
 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
 ; GCN-NEXT: .amdhsa_wavefront_size32 1
+; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
 ; GCN-NEXT: .amdhsa_enable_private_segment 0
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
@@ -115,6 +118,7 @@ define amdgpu_kernel void @queue_ptr() {
 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 1
 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
 ; GCN-NEXT: .amdhsa_wavefront_size32 1
+; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
 ; GCN-NEXT: .amdhsa_enable_private_segment 1
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1

diff  --git a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
index aebbe33fd236c..8e93f3a139b49 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
@@ -65,6 +65,7 @@ define amdgpu_kernel void @indirect_call_known_no_special_inputs() {
 ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
 ; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 1
 ; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
 ; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
 ; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0

diff  --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
index 7cc2b8214a36d..a83a2e759a5af 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
@@ -26,6 +26,7 @@ define amdgpu_kernel void @max_alignment_128() #0 {
 ; VI-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
 ; VI-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
 ; VI-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
+; VI-NEXT:     .amdhsa_uses_dynamic_stack 0
 ; VI-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
 ; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
 ; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
@@ -74,6 +75,7 @@ define amdgpu_kernel void @max_alignment_128() #0 {
 ; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
 ; GFX9-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
 ; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
+; GFX9-NEXT:     .amdhsa_uses_dynamic_stack 0
 ; GFX9-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
 ; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
 ; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
@@ -129,6 +131,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
 ; VI-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
 ; VI-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
 ; VI-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
+; VI-NEXT:     .amdhsa_uses_dynamic_stack 0
 ; VI-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
 ; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
 ; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
@@ -177,6 +180,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
 ; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
 ; GFX9-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
 ; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
+; GFX9-NEXT:     .amdhsa_uses_dynamic_stack 0
 ; GFX9-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
 ; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
 ; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
@@ -232,6 +236,7 @@ define amdgpu_kernel void @alignstack_attr() #2 {
 ; VI-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
 ; VI-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
 ; VI-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
+; VI-NEXT:     .amdhsa_uses_dynamic_stack 0
 ; VI-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
 ; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
 ; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
@@ -280,6 +285,7 @@ define amdgpu_kernel void @alignstack_attr() #2 {
 ; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
 ; GFX9-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
 ; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
+; GFX9-NEXT:     .amdhsa_uses_dynamic_stack 0
 ; GFX9-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
 ; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
 ; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0

diff  --git a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
index ba60000837cdc..32281c97c2631 100644
--- a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
@@ -31,7 +31,7 @@
 // OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000
+// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f0c0000 00000000
 // special_sgpr
 // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
 // OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
@@ -91,6 +91,7 @@ special_sgpr:
   .amdhsa_user_sgpr_flat_scratch_init 1
   .amdhsa_user_sgpr_private_segment_size 1
   .amdhsa_wavefront_size32 1
+  .amdhsa_uses_dynamic_stack 1
   .amdhsa_system_sgpr_private_segment_wavefront_offset 1
   .amdhsa_system_sgpr_workgroup_id_x 0
   .amdhsa_system_sgpr_workgroup_id_y 1
@@ -134,6 +135,7 @@ special_sgpr:
 // ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
 // ASM-NEXT: .amdhsa_wavefront_size32 1
+// ASM-NEXT: .amdhsa_uses_dynamic_stack 1
 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1

diff  --git a/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s
index 7f885b457aa63..ca0783f0ede02 100644
--- a/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s
@@ -31,7 +31,7 @@
 // OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0070 015001e4 130f007f 5e040000 00000000
+// OBJDUMP-NEXT: 0070 015001e4 130f007f 5e0c0000 00000000
 // special_sgpr
 // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
 // OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
@@ -87,6 +87,7 @@ special_sgpr:
   .amdhsa_user_sgpr_dispatch_id 1
   .amdhsa_user_sgpr_private_segment_size 1
   .amdhsa_wavefront_size32 1
+  .amdhsa_uses_dynamic_stack 1
   .amdhsa_enable_private_segment 1
   .amdhsa_system_sgpr_workgroup_id_x 0
   .amdhsa_system_sgpr_workgroup_id_y 1
@@ -126,6 +127,7 @@ special_sgpr:
 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
 // ASM-NEXT: .amdhsa_wavefront_size32 1
+// ASM-NEXT: .amdhsa_uses_dynamic_stack 1
 // ASM-NEXT: .amdhsa_enable_private_segment 1
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1

diff  --git a/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s
index 6ab70e5c72b2f..4c107630ee6e9 100644
--- a/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s
@@ -28,7 +28,7 @@
 // OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000
 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100
-// OBJDUMP-NEXT: 0070 c1500104 1f0f007f 7f000000 00000000
+// OBJDUMP-NEXT: 0070 c1500104 1f0f007f 7f080000 00000000
 
 .text
 // ASM: .text
@@ -77,6 +77,7 @@ complete:
   .amdhsa_user_sgpr_dispatch_id 1
   .amdhsa_user_sgpr_flat_scratch_init 1
   .amdhsa_user_sgpr_private_segment_size 1
+  .amdhsa_uses_dynamic_stack 1
   .amdhsa_system_sgpr_private_segment_wavefront_offset 1
   .amdhsa_system_sgpr_workgroup_id_x 0
   .amdhsa_system_sgpr_workgroup_id_y 1
@@ -117,6 +118,7 @@ complete:
 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
 // ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
+// ASM-NEXT: .amdhsa_uses_dynamic_stack 1
 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1

diff  --git a/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s
index bf2e6081a8050..46a730330dd13 100644
--- a/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s
@@ -28,7 +28,7 @@
 // OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000
 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100
-// OBJDUMP-NEXT: 0070 01510104 130f007f 5e000000 00000000
+// OBJDUMP-NEXT: 0070 01510104 130f007f 5e080000 00000000
 
 .text
 // ASM: .text
@@ -75,6 +75,7 @@ complete:
   .amdhsa_user_sgpr_kernarg_segment_ptr 1
   .amdhsa_user_sgpr_dispatch_id 1
   .amdhsa_user_sgpr_private_segment_size 1
+  .amdhsa_uses_dynamic_stack 1
   .amdhsa_enable_private_segment 1
   .amdhsa_system_sgpr_workgroup_id_x 0
   .amdhsa_system_sgpr_workgroup_id_y 1
@@ -112,6 +113,7 @@ complete:
 // ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
+// ASM-NEXT: .amdhsa_uses_dynamic_stack 1
 // ASM-NEXT: .amdhsa_enable_private_segment 1
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1

diff  --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s
index 7da24a9d42ad4..771f37dd5b85c 100644
--- a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s
@@ -39,6 +39,7 @@
 ; CHECK-NEXT:     .private_segment_fixed_size: 32
 ; CHECK-NEXT:     .sgpr_count:     14
 ; CHECK-NEXT:     .symbol:         'test_kernel at kd'
+; CHECK-NEXT:     .uses_dynamic_stack: true
 ; CHECK-NEXT:     .vgpr_count:     40
 ; CHECK-NEXT:     .wavefront_size: 128
 ; CHECK-NEXT: amdhsa.printf:   
@@ -65,6 +66,7 @@
       .kernarg_segment_size: 8
       .group_segment_fixed_size: 16
       .private_segment_fixed_size: 32
+      .uses_dynamic_stack: true
       .kernarg_segment_align: 64
       .wavefront_size: 128
       .sgpr_count: 14

diff  --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s
index 602b4566d7f25..6cdd21557cfcb 100644
--- a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s
@@ -20,6 +20,7 @@
 // CHECK-NEXT:       - 4
 // CHECK:          .sgpr_count:     14
 // CHECK:          .symbol:         'test_kernel at kd'
+// CHECK:          .uses_dynamic_stack: true
 // CHECK:          .vec_type_hint:  int
 // CHECK:          .vgpr_count:     40
 // CHECK:          .wavefront_size: 128
@@ -51,6 +52,7 @@
       .kernarg_segment_size: 8
       .group_segment_fixed_size: 16
       .private_segment_fixed_size: 32
+      .uses_dynamic_stack: true
       .kernarg_segment_align: 64
       .wavefront_size: 128
       .sgpr_count: 14

diff  --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s
index cad60ea0d6d4a..064ccb3a75f9e 100644
--- a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s
@@ -13,6 +13,7 @@
 // CHECK:          .sgpr_count:     40
 // CHECK:          .sgpr_spill_count: 1
 // CHECK:          .symbol:         'test_kernel at kd'
+// CHECK:          .uses_dynamic_stack: true
 // CHECK:          .vgpr_count:     14
 // CHECK:          .vgpr_spill_count: 1
 // CHECK:          .wavefront_size: 64
@@ -32,6 +33,7 @@
       .kernarg_segment_size:      24
       .group_segment_fixed_size:   24
       .private_segment_fixed_size: 16
+      .uses_dynamic_stack: true
       .kernarg_segment_align:     16
       .wavefront_size:           64
       .max_flat_workgroup_size:    256

diff  --git a/llvm/test/MC/AMDGPU/hsa-v3.s b/llvm/test/MC/AMDGPU/hsa-v3.s
index 9f854986d7bc4..d9dbd5988d2e8 100644
--- a/llvm/test/MC/AMDGPU/hsa-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-v3.s
@@ -34,7 +34,7 @@
 // OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000
+// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f080000 00000000
 // special_sgpr
 // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
 // OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
@@ -101,6 +101,7 @@ disabled_user_sgpr:
   .amdhsa_user_sgpr_dispatch_id 1
   .amdhsa_user_sgpr_flat_scratch_init 1
   .amdhsa_user_sgpr_private_segment_size 1
+  .amdhsa_uses_dynamic_stack 1
   .amdhsa_system_sgpr_private_segment_wavefront_offset 1
   .amdhsa_system_sgpr_workgroup_id_x 0
   .amdhsa_system_sgpr_workgroup_id_y 1
@@ -140,6 +141,7 @@ disabled_user_sgpr:
 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
 // ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
+// ASM-NEXT: .amdhsa_uses_dynamic_stack 1
 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
@@ -259,6 +261,7 @@ v_mov_b32_e32 v16, s3
       .kernarg_segment_size: 8
       .group_segment_fixed_size: 16
       .private_segment_fixed_size: 32
+      .uses_dynamic_stack: true
       .kernarg_segment_align: 64
       .wavefront_size: 128
       .sgpr_count: 14
@@ -269,6 +272,7 @@ v_mov_b32_e32 v16, s3
       .kernarg_segment_size: 8
       .group_segment_fixed_size: 16
       .private_segment_fixed_size: 32
+      .uses_dynamic_stack: true
       .kernarg_segment_align: 64
       .wavefront_size: 128
       .sgpr_count: 14
@@ -286,6 +290,7 @@ v_mov_b32_e32 v16, s3
 // ASM:          .private_segment_fixed_size: 32
 // ASM:          .sgpr_count:     14
 // ASM:          .symbol:         'amd_kernel_code_t_test_all at kd'
+// ASM:          .uses_dynamic_stack: true
 // ASM:          .vgpr_count:     40
 // ASM:          .wavefront_size: 128
 // ASM:        - .group_segment_fixed_size: 16
@@ -296,6 +301,7 @@ v_mov_b32_e32 v16, s3
 // ASM:          .private_segment_fixed_size: 32
 // ASM:          .sgpr_count:     14
 // ASM:          .symbol:         'amd_kernel_code_t_minimal at kd'
+// ASM:          .uses_dynamic_stack: true
 // ASM:          .vgpr_count:     40
 // ASM:          .wavefront_size: 128
 // ASM:      amdhsa.version:

diff  --git a/llvm/test/MC/AMDGPU/hsa-v4.s b/llvm/test/MC/AMDGPU/hsa-v4.s
index 6a824b8bcc7b9..79dd9a06b0f03 100644
--- a/llvm/test/MC/AMDGPU/hsa-v4.s
+++ b/llvm/test/MC/AMDGPU/hsa-v4.s
@@ -34,7 +34,7 @@
 // OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000
+// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f080000 00000000
 // special_sgpr
 // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
 // OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
@@ -102,6 +102,7 @@ disabled_user_sgpr:
   .amdhsa_user_sgpr_dispatch_id 1
   .amdhsa_user_sgpr_flat_scratch_init 1
   .amdhsa_user_sgpr_private_segment_size 1
+  .amdhsa_uses_dynamic_stack 1
   .amdhsa_system_sgpr_private_segment_wavefront_offset 1
   .amdhsa_system_sgpr_workgroup_id_x 0
   .amdhsa_system_sgpr_workgroup_id_y 1
@@ -141,6 +142,7 @@ disabled_user_sgpr:
 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
 // ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
+// ASM-NEXT: .amdhsa_uses_dynamic_stack 1
 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
@@ -260,6 +262,7 @@ v_mov_b32_e32 v16, s3
       .kernarg_segment_size: 8
       .group_segment_fixed_size: 16
       .private_segment_fixed_size: 32
+      .uses_dynamic_stack: true
       .kernarg_segment_align: 64
       .wavefront_size: 128
       .sgpr_count: 14
@@ -269,6 +272,7 @@ v_mov_b32_e32 v16, s3
       .symbol: amd_kernel_code_t_minimal at kd
       .kernarg_segment_size: 8
       .group_segment_fixed_size: 16
+      .uses_dynamic_stack: true
       .private_segment_fixed_size: 32
       .kernarg_segment_align: 64
       .wavefront_size: 128
@@ -287,6 +291,7 @@ v_mov_b32_e32 v16, s3
 // ASM:          .private_segment_fixed_size: 32
 // ASM:          .sgpr_count:     14
 // ASM:          .symbol:         'amd_kernel_code_t_test_all at kd'
+// ASM:          .uses_dynamic_stack: true
 // ASM:          .vgpr_count:     40
 // ASM:          .wavefront_size: 128
 // ASM:        - .group_segment_fixed_size: 16
@@ -297,6 +302,7 @@ v_mov_b32_e32 v16, s3
 // ASM:          .private_segment_fixed_size: 32
 // ASM:          .sgpr_count:     14
 // ASM:          .symbol:         'amd_kernel_code_t_minimal at kd'
+// ASM:          .uses_dynamic_stack: true
 // ASM:          .vgpr_count:     40
 // ASM:          .wavefront_size: 128
 // ASM:      amdhsa.version:

diff  --git a/llvm/test/tools/llvm-readobj/ELF/note-amd-valid-v3.s b/llvm/test/tools/llvm-readobj/ELF/note-amd-valid-v3.s
index 73022c3c89f78..a84b2abcb1f47 100644
--- a/llvm/test/tools/llvm-readobj/ELF/note-amd-valid-v3.s
+++ b/llvm/test/tools/llvm-readobj/ELF/note-amd-valid-v3.s
@@ -10,10 +10,10 @@
 #LLVM-NEXT:    NoteSection {
 #LLVM-NEXT:      Name: .note
 #LLVM-NEXT:      Offset: 0x40
-#LLVM-NEXT:      Size: 0x110
+#LLVM-NEXT:      Size: 0x128
 #LLVM-NEXT:      Note {
 #LLVM-NEXT:        Owner: AMDGPU
-#LLVM-NEXT:        Data size: 0xFC
+#LLVM-NEXT:        Data size: 0x111
 #LLVM-NEXT:        Type: NT_AMDGPU_METADATA (AMDGPU Metadata)
 #LLVM-NEXT:        AMDGPU Metadata: ---
 #LLVM-NEXT:  amdhsa.kernels:
@@ -25,6 +25,7 @@
 #LLVM-NEXT:      .private_segment_fixed_size: 32
 #LLVM-NEXT:      .sgpr_count:     14
 #LLVM-NEXT:      .symbol:         'test_kernel at kd'
+#LLVM-NEXT:      .uses_dynamic_stack: true
 #LLVM-NEXT:      .vgpr_count:     40
 #LLVM-NEXT:      .wavefront_size: 128
 #LLVM-NEXT:  amdhsa.version:
@@ -38,7 +39,7 @@
 
 # GNU:      Displaying notes found in: .note
 # GNU-NEXT:   Owner                Data size        Description
-# GNU-NEXT:   AMDGPU               0x000000fc       NT_AMDGPU_METADATA (AMDGPU Metadata)
+# GNU-NEXT:   AMDGPU               0x00000111       NT_AMDGPU_METADATA (AMDGPU Metadata)
 # GNU-NEXT:     AMDGPU Metadata:
 # GNU-NEXT:         ---
 # GNU-NEXT: amdhsa.kernels:
@@ -50,6 +51,7 @@
 # GNU-NEXT:     .private_segment_fixed_size: 32
 # GNU-NEXT:     .sgpr_count:     14
 # GNU-NEXT:     .symbol:         'test_kernel at kd'
+# GNU-NEXT:     .uses_dynamic_stack: true
 # GNU-NEXT:     .vgpr_count:     40
 # GNU-NEXT:     .wavefront_size: 128
 # GNU-NEXT: amdhsa.version:
@@ -69,6 +71,7 @@
       .kernarg_segment_size:       8
       .max_flat_workgroup_size:    256
       .private_segment_fixed_size: 32
+      .uses_dynamic_stack: true
       .sgpr_count:                 14
       .vgpr_count:                 40
       .wavefront_size:             128

diff  --git a/llvm/test/tools/llvm-readobj/ELF/note-amdgpu.test b/llvm/test/tools/llvm-readobj/ELF/note-amdgpu.test
index f2d89d103a3dc..5d4f41ced9b16 100644
--- a/llvm/test/tools/llvm-readobj/ELF/note-amdgpu.test
+++ b/llvm/test/tools/llvm-readobj/ELF/note-amdgpu.test
@@ -4,7 +4,7 @@
 
 # GNU:      Displaying notes found in: .note.foo
 # GNU-NEXT:   Owner                Data size        Description
-# GNU-NEXT:   AMDGPU               0x000000e6       NT_AMDGPU_METADATA (AMDGPU Metadata)
+# GNU-NEXT:   AMDGPU               0x000000fb       NT_AMDGPU_METADATA (AMDGPU Metadata)
 # GNU-NEXT:     AMDGPU Metadata:
 # GNU-NEXT:         ---
 # GNU-NEXT: amdhsa.kernels:
@@ -16,6 +16,7 @@
 # GNU-NEXT:     .private_segment_fixed_size: 3
 # GNU-NEXT:     .sgpr_count:     6
 # GNU-NEXT:     .symbol:         foo
+# GNU-NEXT:     .uses_dynamic_stack: true
 # GNU-NEXT:     .vgpr_count:     7
 # GNU-NEXT:     .wavefront_size: 5
 # GNU-NEXT: amdhsa.version:
@@ -37,7 +38,7 @@
 # LLVM-NEXT:     Size:
 # LLVM-NEXT:     Note {
 # LLVM-NEXT:       Owner: AMDGPU
-# LLVM-NEXT:       Data size: 0xE6
+# LLVM-NEXT:       Data size: 0xFB
 # LLVM-NEXT:       Type: NT_AMDGPU_METADATA (AMDGPU Metadata)
 # LLVM-NEXT:       AMDGPU Metadata: ---
 # LLVM-NEXT: amdhsa.kernels:
@@ -49,6 +50,7 @@
 # LLVM-NEXT:     .private_segment_fixed_size: 3
 # LLVM-NEXT:     .sgpr_count:     6
 # LLVM-NEXT:     .symbol:         foo
+# LLVM-NEXT:     .uses_dynamic_stack: true
 # LLVM-NEXT:     .vgpr_count:     7
 # LLVM-NEXT:     .wavefront_size: 5
 # LLVM-NEXT: amdhsa.version:
@@ -60,7 +62,7 @@
 # LLVM-NEXT:   }
 # LLVM-NEXT:   NoteSection {
 # LLVM-NEXT:     Name: .note.unknown
-# LLVM-NEXT:     Offset: 0x13C
+# LLVM-NEXT:     Offset: 0x150
 # LLVM-NEXT:     Size: 0x18
 # LLVM-NEXT:     Note {
 # LLVM-NEXT:       Owner: AMDGPU
@@ -87,12 +89,17 @@
 #       .kernarg_segment_size: 1
 #       .group_segment_fixed_size: 2
 #       .private_segment_fixed_size: 3
+#       .uses_dynamic_stack: true
 #       .kernarg_segment_align: 4
 #       .wavefront_size: 5
 #       .sgpr_count: 6
 #       .vgpr_count: 7
 #       .max_flat_workgroup_size: 8
 # .end_amdgpu_metadata
+#
+## Here's one way to get the contents of .note.foo in the test input from %t.o:
+# $ llvm-objcopy -O binary --only-section=.note %t.o note.out
+# $ xxd -p note.out | tr -d '\n' | tr a-z A-Z
 
 --- !ELF
 FileHeader:
@@ -102,7 +109,7 @@ FileHeader:
 Sections:
   - Name:        .note.foo
     Type:        SHT_NOTE
-    Content:     07000000E600000020000000414D44475055000082AE616D646873612E6B65726E656C73918AB92E67726F75705F7365676D656E745F66697865645F73697A6502B62E6B65726E6172675F7365676D656E745F616C69676E04B52E6B65726E6172675F7365676D656E745F73697A6501B82E6D61785F666C61745F776F726B67726F75705F73697A6508A52E6E616D65A3666F6FBB2E707269766174655F7365676D656E745F66697865645F73697A6503AB2E736770725F636F756E7406A72E73796D626F6CA3666F6FAB2E766770725F636F756E7407AF2E7761766566726F6E745F73697A6505AE616D646873612E76657273696F6E9201000000
+    Content:     07000000FB00000020000000414D44475055000082AE616D646873612E6B65726E656C73918BB92E67726F75705F7365676D656E745F66697865645F73697A6502B62E6B65726E6172675F7365676D656E745F616C69676E04B52E6B65726E6172675F7365676D656E745F73697A6501B82E6D61785F666C61745F776F726B67726F75705F73697A6508A52E6E616D65A3666F6FBB2E707269766174655F7365676D656E745F66697865645F73697A6503AB2E736770725F636F756E7406A72E73796D626F6CA3666F6FB32E757365735F64796E616D69635F737461636BC3AB2E766770725F636F756E7407AF2E7761766566726F6E745F73697A6505AE616D646873612E76657273696F6E92010000
   - Name:        .note.unknown
     Type:        SHT_NOTE
     Notes:


        


More information about the llvm-commits mailing list