[llvm] [AMDGPU] Add SubtargetFeature for dynamic VGPR mode (PR #130030)

Diana Picus via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 18 03:42:22 PDT 2025


https://github.com/rovka updated https://github.com/llvm/llvm-project/pull/130030

>From 5f73d9e21163cd58599c0bef4262e66b773835c9 Mon Sep 17 00:00:00 2001
From: Diana Picus <Diana-Magda.Picus at amd.com>
Date: Mon, 23 Oct 2023 10:36:31 +0200
Subject: [PATCH 1/2] [AMDGPU] Add SubtargetFeature for dynamic VGPR mode

This represents a hardware mode supported only for wave32 compute
shaders. When enabled, we set the `.dynamic_vgpr_en` field of
`.compute_registers` to true in the PAL metadata.
---
 llvm/docs/AMDGPUUsage.rst                    |  6 ++++++
 llvm/lib/Target/AMDGPU/AMDGPU.td             |  6 ++++++
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp  |  3 +++
 llvm/lib/Target/AMDGPU/GCNSubtarget.h        |  3 +++
 llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll | 13 ++++++++-----
 5 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index def6addd595e8..59cc08a59ed7c 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -758,6 +758,12 @@ For example:
                                                   enabled will execute correctly but may be less
                                                   performant than code generated for XNACK replay
                                                   disabled.
+
+     dynamic-vgpr    TODO                         Represents the "Dynamic VGPR" hardware mode, introduced in GFX12.
+                                                  Waves launched in this mode may allocate or deallocate the VGPRs
+                                                  using dedicated instructions, but may not send the DEALLOC_VGPRS
+                                                  message.
+
      =============== ============================ ==================================================
 
 .. _amdgpu-target-id:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 1c8dc09d3060b..fdbabcb62c0bf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1251,6 +1251,12 @@ def FeatureXF32Insts : SubtargetFeature<"xf32-insts",
    "v_mfma_f32_16x16x8_xf32 and v_mfma_f32_32x32x4_xf32"
  >;
 
+def FeatureDynamicVGPR : SubtargetFeature <"dynamic-vgpr",
+  "DynamicVGPR",
+  "true",
+  "Enable dynamic VGPR mode"
+>;
+
 // Dummy feature used to disable assembler instructions.
 def FeatureDisable : SubtargetFeature<"",
   "FeatureDisable","true",
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 31e0bd8d652bc..13e61756e3036 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1414,6 +1414,9 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
     MD->setHwStage(CC, ".trap_present",
                    (bool)CurrentProgramInfo.TrapHandlerEnable);
     MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable);
+
+    if (ST.isDynamicVGPREnabled())
+      MD->setComputeRegisters(".dynamic_vgpr_en", true);
   }
 
   MD->setHwStage(CC, ".lds_size",
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index f7f03fe5911bd..651dbad8244cb 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -191,6 +191,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   /// indicates a lack of S_CLAUSE support.
   unsigned MaxHardClauseLength = 0;
   bool SupportsSRAMECC = false;
+  bool DynamicVGPR = false;
 
   // This should not be used directly. 'TargetID' tracks the dynamic settings
   // for SRAMECC.
@@ -1653,6 +1654,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     return true;
   }
 
+  bool isDynamicVGPREnabled() const { return DynamicVGPR; }
+
   bool requiresDisjointEarlyClobberAndUndef() const override {
     // AMDGPU doesn't care if early-clobber and undef operands are allocated
     // to the same register.
diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
index 7536e83a9da6b..fa22089978c2e 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
@@ -1,4 +1,6 @@
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s --check-prefixes=CHECK,GFX11
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 <%s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr <%s | FileCheck %s --check-prefixes=CHECK,DVGPR
 
 ; CHECK-LABEL: {{^}}_amdgpu_cs_main:
 ; CHECK: ; TotalNumSgprs: 4
@@ -8,6 +10,7 @@
 ; CHECK-NEXT: amdpal.pipelines:
 ; CHECK-NEXT:   - .api:            Vulkan
 ; CHECK-NEXT:     .compute_registers:
+; DVGPR-NEXT:       .dynamic_vgpr_en:   true
 ; CHECK-NEXT:       .tg_size_en:     true
 ; CHECK-NEXT:       .tgid_x_en:      false
 ; CHECK-NEXT:       .tgid_y_en:      false
@@ -57,7 +60,7 @@
 ; CHECK-NEXT:        .entry_point_symbol:    _amdgpu_cs_main
 ; CHECK-NEXT:        .excp_en:        0
 ; CHECK-NEXT:        .float_mode:     0xc0
-; CHECK-NEXT:        .ieee_mode:      false
+; GFX11-NEXT:        .ieee_mode:      false
 ; CHECK-NEXT:        .image_op:       false
 ; CHECK-NEXT:        .lds_size:       0
 ; CHECK-NEXT:        .mem_ordered:    true
@@ -112,7 +115,7 @@
 ; CHECK-NEXT:        .debug_mode:     false
 ; CHECK-NEXT:        .entry_point:    _amdgpu_gs
 ; CHECK-NEXT:        .entry_point_symbol:    gs_shader
-; CHECK-NEXT:        .ieee_mode:      false
+; GFX11-NEXT:        .ieee_mode:      false
 ; CHECK-NEXT:        .lds_size:       0x200
 ; CHECK-NEXT:        .mem_ordered:    true
 ; CHECK-NEXT:        .scratch_en:     false
@@ -124,7 +127,7 @@
 ; CHECK-NEXT:        .debug_mode:     false
 ; CHECK-NEXT:        .entry_point:    _amdgpu_hs
 ; CHECK-NEXT:        .entry_point_symbol:    hs_shader
-; CHECK-NEXT:        .ieee_mode:      false
+; GFX11-NEXT:        .ieee_mode:      false
 ; CHECK-NEXT:        .lds_size:       0x1000
 ; CHECK-NEXT:        .mem_ordered:    true
 ; CHECK-NEXT:        .scratch_en:     false
@@ -136,7 +139,7 @@
 ; CHECK-NEXT:        .debug_mode:     false
 ; CHECK-NEXT:        .entry_point:    _amdgpu_ps
 ; CHECK-NEXT:        .entry_point_symbol:    ps_shader
-; CHECK-NEXT:        .ieee_mode:      false
+; GFX11-NEXT:        .ieee_mode:      false
 ; CHECK-NEXT:        .lds_size:       0
 ; CHECK-NEXT:        .mem_ordered:    true
 ; CHECK-NEXT:        .scratch_en:     false

>From 392776287ec6e6247b02e8d5358c8b9c46ad0448 Mon Sep 17 00:00:00 2001
From: Diana Picus <diana-magda.picus at amd.com>
Date: Tue, 18 Mar 2025 11:06:01 +0100
Subject: [PATCH 2/2] Update test for non-entry func

---
 .../AMDGPU/pal-metadata-3.0-callable.ll       | 22 +++++++++++++------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
index 538ce15979de8..dd1e717c10486 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
@@ -1,10 +1,13 @@
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,GFX11 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,GFX12 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,GFX12,DVGPR %s
 
 ; CHECK:           .amdgpu_pal_metadata
 ; CHECK-NEXT: ---
 ; CHECK-NEXT: amdpal.pipelines:
 ; CHECK-NEXT:  - .api:            Vulkan
 ; CHECK-NEXT:    .compute_registers:
+; DVGPR-NEXT:      .dynamic_vgpr_en:   true
 ; CHECK-NEXT:      .tg_size_en:     true
 ; CHECK-NEXT:      .tgid_x_en:      false
 ; CHECK-NEXT:      .tgid_y_en:      false
@@ -16,7 +19,7 @@
 ; CHECK-NEXT:        .debug_mode:     0
 ; CHECK-NEXT:        .excp_en:        0
 ; CHECK-NEXT:        .float_mode:     0xc0
-; CHECK-NEXT:        .ieee_mode:      true
+; GFX11-NEXT:        .ieee_mode:      true
 ; CHECK-NEXT:        .image_op:       false
 ; CHECK-NEXT:        .lds_size:       0x200
 ; CHECK-NEXT:        .mem_ordered:    true
@@ -98,19 +101,22 @@
 ; CHECK-NEXT:      no_stack_extern_call:
 ; CHECK-NEXT:        .backend_stack_size: 0x10
 ; CHECK-NEXT:        .lds_size:       0
-; CHECK-NEXT:        .sgpr_count:     0x29
+; GFX11-NEXT:        .sgpr_count:     0x29
+; GFX12-NEXT:        .sgpr_count:     0x24
 ; CHECK-NEXT:        .stack_frame_size_in_bytes: 0x10
 ; CHECK-NEXT:        .vgpr_count:     0x58
 ; CHECK-NEXT:      no_stack_extern_call_many_args:
 ; CHECK-NEXT:        .backend_stack_size: 0x90
 ; CHECK-NEXT:        .lds_size:       0
-; CHECK-NEXT:        .sgpr_count:     0x29
+; GFX11-NEXT:        .sgpr_count:     0x29
+; GFX12-NEXT:        .sgpr_count:     0x24
 ; CHECK-NEXT:        .stack_frame_size_in_bytes: 0x90
 ; CHECK-NEXT:        .vgpr_count:     0x58
 ; CHECK-NEXT:      no_stack_indirect_call:
 ; CHECK-NEXT:        .backend_stack_size: 0x10
 ; CHECK-NEXT:        .lds_size:       0
-; CHECK-NEXT:        .sgpr_count:     0x29
+; GFX11-NEXT:        .sgpr_count:     0x29
+; GFX12-NEXT:        .sgpr_count:     0x24
 ; CHECK-NEXT:        .stack_frame_size_in_bytes: 0x10
 ; CHECK-NEXT:        .vgpr_count:     0x58
 ; CHECK-NEXT:      simple_lds:
@@ -140,13 +146,15 @@
 ; CHECK-NEXT:      simple_stack_extern_call:
 ; CHECK-NEXT:        .backend_stack_size: 0x20
 ; CHECK-NEXT:        .lds_size:       0
-; CHECK-NEXT:        .sgpr_count:     0x29
+; GFX11-NEXT:        .sgpr_count:     0x29
+; GFX12-NEXT:        .sgpr_count:     0x24
 ; CHECK-NEXT:        .stack_frame_size_in_bytes: 0x20
 ; CHECK-NEXT:        .vgpr_count:     0x58
 ; CHECK-NEXT:      simple_stack_indirect_call:
 ; CHECK-NEXT:        .backend_stack_size: 0x20
 ; CHECK-NEXT:        .lds_size:       0
-; CHECK-NEXT:        .sgpr_count:     0x29
+; GFX11-NEXT:        .sgpr_count:     0x29
+; GFX12-NEXT:        .sgpr_count:     0x24
 ; CHECK-NEXT:        .stack_frame_size_in_bytes: 0x20
 ; CHECK-NEXT:        .vgpr_count:     0x58
 ; CHECK-NEXT:      simple_stack_recurse:



More information about the llvm-commits mailing list