[llvm] [AMDGPU] Add SubtargetFeature for dynamic VGPR mode (PR #130030)
Diana Picus via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 18 03:42:22 PDT 2025
https://github.com/rovka updated https://github.com/llvm/llvm-project/pull/130030
>From 5f73d9e21163cd58599c0bef4262e66b773835c9 Mon Sep 17 00:00:00 2001
From: Diana Picus <Diana-Magda.Picus at amd.com>
Date: Mon, 23 Oct 2023 10:36:31 +0200
Subject: [PATCH 1/2] [AMDGPU] Add SubtargetFeature for dynamic VGPR mode
This represents a hardware mode supported only for wave32 compute
shaders. When enabled, we set the `.dynamic_vgpr_en` field of
`.compute_registers` to true in the PAL metadata.
---
llvm/docs/AMDGPUUsage.rst | 6 ++++++
llvm/lib/Target/AMDGPU/AMDGPU.td | 6 ++++++
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 3 +++
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 3 +++
llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll | 13 ++++++++-----
5 files changed, 26 insertions(+), 5 deletions(-)
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index def6addd595e8..59cc08a59ed7c 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -758,6 +758,12 @@ For example:
enabled will execute correctly but may be less
performant than code generated for XNACK replay
disabled.
+
+ dynamic-vgpr TODO Represents the "Dynamic VGPR" hardware mode, introduced in GFX12.
+ Waves launched in this mode may allocate or deallocate the VGPRs
+ using dedicated instructions, but may not send the DEALLOC_VGPRS
+ message.
+
=============== ============================ ==================================================
.. _amdgpu-target-id:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 1c8dc09d3060b..fdbabcb62c0bf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1251,6 +1251,12 @@ def FeatureXF32Insts : SubtargetFeature<"xf32-insts",
"v_mfma_f32_16x16x8_xf32 and v_mfma_f32_32x32x4_xf32"
>;
+def FeatureDynamicVGPR : SubtargetFeature <"dynamic-vgpr",
+ "DynamicVGPR",
+ "true",
+ "Enable dynamic VGPR mode"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 31e0bd8d652bc..13e61756e3036 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1414,6 +1414,9 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
MD->setHwStage(CC, ".trap_present",
(bool)CurrentProgramInfo.TrapHandlerEnable);
MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable);
+
+ if (ST.isDynamicVGPREnabled())
+ MD->setComputeRegisters(".dynamic_vgpr_en", true);
}
MD->setHwStage(CC, ".lds_size",
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index f7f03fe5911bd..651dbad8244cb 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -191,6 +191,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// indicates a lack of S_CLAUSE support.
unsigned MaxHardClauseLength = 0;
bool SupportsSRAMECC = false;
+ bool DynamicVGPR = false;
// This should not be used directly. 'TargetID' tracks the dynamic settings
// for SRAMECC.
@@ -1653,6 +1654,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return true;
}
+ bool isDynamicVGPREnabled() const { return DynamicVGPR; }
+
bool requiresDisjointEarlyClobberAndUndef() const override {
// AMDGPU doesn't care if early-clobber and undef operands are allocated
// to the same register.
diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
index 7536e83a9da6b..fa22089978c2e 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
@@ -1,4 +1,6 @@
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s --check-prefixes=CHECK,GFX11
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 <%s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr <%s | FileCheck %s --check-prefixes=CHECK,DVGPR
; CHECK-LABEL: {{^}}_amdgpu_cs_main:
; CHECK: ; TotalNumSgprs: 4
@@ -8,6 +10,7 @@
; CHECK-NEXT: amdpal.pipelines:
; CHECK-NEXT: - .api: Vulkan
; CHECK-NEXT: .compute_registers:
+; DVGPR-NEXT: .dynamic_vgpr_en: true
; CHECK-NEXT: .tg_size_en: true
; CHECK-NEXT: .tgid_x_en: false
; CHECK-NEXT: .tgid_y_en: false
@@ -57,7 +60,7 @@
; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main
; CHECK-NEXT: .excp_en: 0
; CHECK-NEXT: .float_mode: 0xc0
-; CHECK-NEXT: .ieee_mode: false
+; GFX11-NEXT: .ieee_mode: false
; CHECK-NEXT: .image_op: false
; CHECK-NEXT: .lds_size: 0
; CHECK-NEXT: .mem_ordered: true
@@ -112,7 +115,7 @@
; CHECK-NEXT: .debug_mode: false
; CHECK-NEXT: .entry_point: _amdgpu_gs
; CHECK-NEXT: .entry_point_symbol: gs_shader
-; CHECK-NEXT: .ieee_mode: false
+; GFX11-NEXT: .ieee_mode: false
; CHECK-NEXT: .lds_size: 0x200
; CHECK-NEXT: .mem_ordered: true
; CHECK-NEXT: .scratch_en: false
@@ -124,7 +127,7 @@
; CHECK-NEXT: .debug_mode: false
; CHECK-NEXT: .entry_point: _amdgpu_hs
; CHECK-NEXT: .entry_point_symbol: hs_shader
-; CHECK-NEXT: .ieee_mode: false
+; GFX11-NEXT: .ieee_mode: false
; CHECK-NEXT: .lds_size: 0x1000
; CHECK-NEXT: .mem_ordered: true
; CHECK-NEXT: .scratch_en: false
@@ -136,7 +139,7 @@
; CHECK-NEXT: .debug_mode: false
; CHECK-NEXT: .entry_point: _amdgpu_ps
; CHECK-NEXT: .entry_point_symbol: ps_shader
-; CHECK-NEXT: .ieee_mode: false
+; GFX11-NEXT: .ieee_mode: false
; CHECK-NEXT: .lds_size: 0
; CHECK-NEXT: .mem_ordered: true
; CHECK-NEXT: .scratch_en: false
>From 392776287ec6e6247b02e8d5358c8b9c46ad0448 Mon Sep 17 00:00:00 2001
From: Diana Picus <diana-magda.picus at amd.com>
Date: Tue, 18 Mar 2025 11:06:01 +0100
Subject: [PATCH 2/2] Update test for non-entry func
---
.../AMDGPU/pal-metadata-3.0-callable.ll | 22 +++++++++++++------
1 file changed, 15 insertions(+), 7 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
index 538ce15979de8..dd1e717c10486 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
@@ -1,10 +1,13 @@
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,GFX11 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,GFX12 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,GFX12,DVGPR %s
; CHECK: .amdgpu_pal_metadata
; CHECK-NEXT: ---
; CHECK-NEXT: amdpal.pipelines:
; CHECK-NEXT: - .api: Vulkan
; CHECK-NEXT: .compute_registers:
+; DVGPR-NEXT: .dynamic_vgpr_en: true
; CHECK-NEXT: .tg_size_en: true
; CHECK-NEXT: .tgid_x_en: false
; CHECK-NEXT: .tgid_y_en: false
@@ -16,7 +19,7 @@
; CHECK-NEXT: .debug_mode: 0
; CHECK-NEXT: .excp_en: 0
; CHECK-NEXT: .float_mode: 0xc0
-; CHECK-NEXT: .ieee_mode: true
+; GFX11-NEXT: .ieee_mode: true
; CHECK-NEXT: .image_op: false
; CHECK-NEXT: .lds_size: 0x200
; CHECK-NEXT: .mem_ordered: true
@@ -98,19 +101,22 @@
; CHECK-NEXT: no_stack_extern_call:
; CHECK-NEXT: .backend_stack_size: 0x10
; CHECK-NEXT: .lds_size: 0
-; CHECK-NEXT: .sgpr_count: 0x29
+; GFX11-NEXT: .sgpr_count: 0x29
+; GFX12-NEXT: .sgpr_count: 0x24
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10
; CHECK-NEXT: .vgpr_count: 0x58
; CHECK-NEXT: no_stack_extern_call_many_args:
; CHECK-NEXT: .backend_stack_size: 0x90
; CHECK-NEXT: .lds_size: 0
-; CHECK-NEXT: .sgpr_count: 0x29
+; GFX11-NEXT: .sgpr_count: 0x29
+; GFX12-NEXT: .sgpr_count: 0x24
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x90
; CHECK-NEXT: .vgpr_count: 0x58
; CHECK-NEXT: no_stack_indirect_call:
; CHECK-NEXT: .backend_stack_size: 0x10
; CHECK-NEXT: .lds_size: 0
-; CHECK-NEXT: .sgpr_count: 0x29
+; GFX11-NEXT: .sgpr_count: 0x29
+; GFX12-NEXT: .sgpr_count: 0x24
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10
; CHECK-NEXT: .vgpr_count: 0x58
; CHECK-NEXT: simple_lds:
@@ -140,13 +146,15 @@
; CHECK-NEXT: simple_stack_extern_call:
; CHECK-NEXT: .backend_stack_size: 0x20
; CHECK-NEXT: .lds_size: 0
-; CHECK-NEXT: .sgpr_count: 0x29
+; GFX11-NEXT: .sgpr_count: 0x29
+; GFX12-NEXT: .sgpr_count: 0x24
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x20
; CHECK-NEXT: .vgpr_count: 0x58
; CHECK-NEXT: simple_stack_indirect_call:
; CHECK-NEXT: .backend_stack_size: 0x20
; CHECK-NEXT: .lds_size: 0
-; CHECK-NEXT: .sgpr_count: 0x29
+; GFX11-NEXT: .sgpr_count: 0x29
+; GFX12-NEXT: .sgpr_count: 0x24
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x20
; CHECK-NEXT: .vgpr_count: 0x58
; CHECK-NEXT: simple_stack_recurse:
More information about the llvm-commits
mailing list