[llvm] 0a21ef9 - [AMDGPU] Add SubtargetFeature for dynamic VGPR mode (#130030)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 18 03:48:04 PDT 2025
Author: Diana Picus
Date: 2025-03-18T11:48:01+01:00
New Revision: 0a21ef9536e0f591d334b230bd388bcb503e27ec
URL: https://github.com/llvm/llvm-project/commit/0a21ef9536e0f591d334b230bd388bcb503e27ec
DIFF: https://github.com/llvm/llvm-project/commit/0a21ef9536e0f591d334b230bd388bcb503e27ec.diff
LOG: [AMDGPU] Add SubtargetFeature for dynamic VGPR mode (#130030)
This represents a hardware mode supported only for wave32 compute
shaders. When enabled, we set the `.dynamic_vgpr_en` field of
`.compute_registers` to true in the PAL metadata.
This will be changed to use an attribute after downstream consumers
have been migrated.
Added:
Modified:
llvm/docs/AMDGPUUsage.rst
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/lib/Target/AMDGPU/GCNSubtarget.h
llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
Removed:
################################################################################
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 5c066eb385909..df6e8d5bbd50e 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -758,6 +758,12 @@ For example:
enabled will execute correctly but may be less
performant than code generated for XNACK replay
disabled.
+
+ dynamic-vgpr TODO Represents the "Dynamic VGPR" hardware mode, introduced in GFX12.
+ Waves launched in this mode may allocate or deallocate the VGPRs
+ using dedicated instructions, but may not send the DEALLOC_VGPRS
+ message.
+
=============== ============================ ==================================================
.. _amdgpu-target-id:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 1c8dc09d3060b..fdbabcb62c0bf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1251,6 +1251,12 @@ def FeatureXF32Insts : SubtargetFeature<"xf32-insts",
"v_mfma_f32_16x16x8_xf32 and v_mfma_f32_32x32x4_xf32"
>;
+def FeatureDynamicVGPR : SubtargetFeature <"dynamic-vgpr",
+ "DynamicVGPR",
+ "true",
+ "Enable dynamic VGPR mode"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index cfa5ae3e3fe59..beffebaaa66d4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1415,6 +1415,9 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
MD->setHwStage(CC, ".trap_present",
(bool)CurrentProgramInfo.TrapHandlerEnable);
MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable);
+
+ if (ST.isDynamicVGPREnabled())
+ MD->setComputeRegisters(".dynamic_vgpr_en", true);
}
MD->setHwStage(CC, ".lds_size",
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index f7f03fe5911bd..651dbad8244cb 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -191,6 +191,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// indicates a lack of S_CLAUSE support.
unsigned MaxHardClauseLength = 0;
bool SupportsSRAMECC = false;
+ bool DynamicVGPR = false;
// This should not be used directly. 'TargetID' tracks the dynamic settings
// for SRAMECC.
@@ -1653,6 +1654,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return true;
}
+ bool isDynamicVGPREnabled() const { return DynamicVGPR; }
+
bool requiresDisjointEarlyClobberAndUndef() const override {
// AMDGPU doesn't care if early-clobber and undef operands are allocated
// to the same register.
diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
index 88d2f6a2e85ef..638dc8965987e 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
@@ -1,10 +1,13 @@
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,GFX11 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,GFX12 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,GFX12,DVGPR %s
; CHECK: .amdgpu_pal_metadata
; CHECK-NEXT: ---
; CHECK-NEXT: amdpal.pipelines:
; CHECK-NEXT: - .api: Vulkan
; CHECK-NEXT: .compute_registers:
+; DVGPR-NEXT: .dynamic_vgpr_en: true
; CHECK-NEXT: .tg_size_en: true
; CHECK-NEXT: .tgid_x_en: false
; CHECK-NEXT: .tgid_y_en: false
@@ -16,7 +19,7 @@
; CHECK-NEXT: .debug_mode: 0
; CHECK-NEXT: .excp_en: 0
; CHECK-NEXT: .float_mode: 0xc0
-; CHECK-NEXT: .ieee_mode: true
+; GFX11-NEXT: .ieee_mode: true
; CHECK-NEXT: .image_op: false
; CHECK-NEXT: .lds_size: 0x200
; CHECK-NEXT: .mem_ordered: true
@@ -98,19 +101,22 @@
; CHECK-NEXT: no_stack_extern_call:
; CHECK-NEXT: .backend_stack_size: 0x10
; CHECK-NEXT: .lds_size: 0
-; CHECK-NEXT: .sgpr_count: 0x29
+; GFX11-NEXT: .sgpr_count: 0x29
+; GFX12-NEXT: .sgpr_count: 0x24
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10
; CHECK-NEXT: .vgpr_count: 0x58
; CHECK-NEXT: no_stack_extern_call_many_args:
; CHECK-NEXT: .backend_stack_size: 0x90
; CHECK-NEXT: .lds_size: 0
-; CHECK-NEXT: .sgpr_count: 0x29
+; GFX11-NEXT: .sgpr_count: 0x29
+; GFX12-NEXT: .sgpr_count: 0x24
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x90
; CHECK-NEXT: .vgpr_count: 0x58
; CHECK-NEXT: no_stack_indirect_call:
; CHECK-NEXT: .backend_stack_size: 0x10
; CHECK-NEXT: .lds_size: 0
-; CHECK-NEXT: .sgpr_count: 0x29
+; GFX11-NEXT: .sgpr_count: 0x29
+; GFX12-NEXT: .sgpr_count: 0x24
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10
; CHECK-NEXT: .vgpr_count: 0x58
; CHECK-NEXT: simple_lds:
@@ -140,13 +146,15 @@
; CHECK-NEXT: simple_stack_extern_call:
; CHECK-NEXT: .backend_stack_size: 0x20
; CHECK-NEXT: .lds_size: 0
-; CHECK-NEXT: .sgpr_count: 0x29
+; GFX11-NEXT: .sgpr_count: 0x29
+; GFX12-NEXT: .sgpr_count: 0x24
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x20
; CHECK-NEXT: .vgpr_count: 0x58
; CHECK-NEXT: simple_stack_indirect_call:
; CHECK-NEXT: .backend_stack_size: 0x20
; CHECK-NEXT: .lds_size: 0
-; CHECK-NEXT: .sgpr_count: 0x29
+; GFX11-NEXT: .sgpr_count: 0x29
+; GFX12-NEXT: .sgpr_count: 0x24
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x20
; CHECK-NEXT: .vgpr_count: 0x58
; CHECK-NEXT: simple_stack_recurse:
diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
index 7536e83a9da6b..fa22089978c2e 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
@@ -1,4 +1,6 @@
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s --check-prefixes=CHECK,GFX11
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 <%s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr <%s | FileCheck %s --check-prefixes=CHECK,DVGPR
; CHECK-LABEL: {{^}}_amdgpu_cs_main:
; CHECK: ; TotalNumSgprs: 4
@@ -8,6 +10,7 @@
; CHECK-NEXT: amdpal.pipelines:
; CHECK-NEXT: - .api: Vulkan
; CHECK-NEXT: .compute_registers:
+; DVGPR-NEXT: .dynamic_vgpr_en: true
; CHECK-NEXT: .tg_size_en: true
; CHECK-NEXT: .tgid_x_en: false
; CHECK-NEXT: .tgid_y_en: false
@@ -57,7 +60,7 @@
; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main
; CHECK-NEXT: .excp_en: 0
; CHECK-NEXT: .float_mode: 0xc0
-; CHECK-NEXT: .ieee_mode: false
+; GFX11-NEXT: .ieee_mode: false
; CHECK-NEXT: .image_op: false
; CHECK-NEXT: .lds_size: 0
; CHECK-NEXT: .mem_ordered: true
@@ -112,7 +115,7 @@
; CHECK-NEXT: .debug_mode: false
; CHECK-NEXT: .entry_point: _amdgpu_gs
; CHECK-NEXT: .entry_point_symbol: gs_shader
-; CHECK-NEXT: .ieee_mode: false
+; GFX11-NEXT: .ieee_mode: false
; CHECK-NEXT: .lds_size: 0x200
; CHECK-NEXT: .mem_ordered: true
; CHECK-NEXT: .scratch_en: false
@@ -124,7 +127,7 @@
; CHECK-NEXT: .debug_mode: false
; CHECK-NEXT: .entry_point: _amdgpu_hs
; CHECK-NEXT: .entry_point_symbol: hs_shader
-; CHECK-NEXT: .ieee_mode: false
+; GFX11-NEXT: .ieee_mode: false
; CHECK-NEXT: .lds_size: 0x1000
; CHECK-NEXT: .mem_ordered: true
; CHECK-NEXT: .scratch_en: false
@@ -136,7 +139,7 @@
; CHECK-NEXT: .debug_mode: false
; CHECK-NEXT: .entry_point: _amdgpu_ps
; CHECK-NEXT: .entry_point_symbol: ps_shader
-; CHECK-NEXT: .ieee_mode: false
+; GFX11-NEXT: .ieee_mode: false
; CHECK-NEXT: .lds_size: 0
; CHECK-NEXT: .mem_ordered: true
; CHECK-NEXT: .scratch_en: false
More information about the llvm-commits
mailing list