[llvm] [AMDGPU] Deallocate VGPRs before exiting in dynamic VGPR mode (PR #130037)
Diana Picus via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 18 04:28:03 PDT 2025
https://github.com/rovka updated https://github.com/llvm/llvm-project/pull/130037
>From 58c9daabc6dbe8bbb21050ee0ead663fbb54fd50 Mon Sep 17 00:00:00 2001
From: Diana Picus <Diana-Magda.Picus at amd.com>
Date: Mon, 23 Oct 2023 11:46:19 +0200
Subject: [PATCH 1/2] [AMDGPU] Deallocate VGPRs before exiting in dynamic VGPR
mode
In dynamic VGPR mode, Waves must deallocate all VGPRs before exiting. If
the shader program does not do this, hardware inserts `S_ALLOC_VGPR 0`
before S_ENDPGM, but this may incur some performance cost. Therefore
it's better if the compiler proactively generates that instruction.
This patch extends `si-insert-waitcnts` to deallocate the VGPRs via
a `S_ALLOC_VGPR 0` before any `S_ENDPGM` when in dynamic VGPR mode.
---
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 60 +--
.../CodeGen/AMDGPU/release-vgprs-gfx12.mir | 356 ++++++++++++++++++
2 files changed, 393 insertions(+), 23 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/release-vgprs-gfx12.mir
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 239f2664f59f3..fab4107a127d0 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1640,17 +1640,21 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
(MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) {
Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false));
}
- // Identify S_ENDPGM instructions which may have to wait for outstanding VMEM
- // stores. In this case it can be useful to send a message to explicitly
- // release all VGPRs before the stores have completed, but it is only safe to
- // do this if:
- // * there are no outstanding scratch stores
- // * we are not in Dynamic VGPR mode
+ // In dynamic VGPR mode, we want to release the VGPRs before the wave exits.
+ // Technically the hardware will do this on its own if we don't, but that
+ // might cost extra cycles compared to doing it explicitly.
+ // When not in dynamic VGPR mode, identify S_ENDPGM instructions which may
+ // have to wait for outstanding VMEM stores. In this case it can be useful to
+ // send a message to explicitly release all VGPRs before the stores have
+ // completed, but it is only safe to do this if there are no outstanding
+ // scratch stores.
else if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) {
- if (ST->getGeneration() >= AMDGPUSubtarget::GFX11 && !WCG->isOptNone() &&
- ScoreBrackets.getScoreRange(STORE_CNT) != 0 &&
- !ScoreBrackets.hasPendingEvent(SCRATCH_WRITE_ACCESS))
+ if (!WCG->isOptNone() &&
+ (ST->isDynamicVGPREnabled() ||
+ (ST->getGeneration() >= AMDGPUSubtarget::GFX11 &&
+ ScoreBrackets.getScoreRange(STORE_CNT) != 0 &&
+ !ScoreBrackets.hasPendingEvent(SCRATCH_WRITE_ACCESS))))
ReleaseVGPRInsts.insert(&MI);
}
// Resolve vm waits before gs-done.
@@ -2593,26 +2597,36 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
}
}
- // Insert DEALLOC_VGPR messages before previously identified S_ENDPGM
- // instructions.
+ // Deallocate the VGPRs before previously identified S_ENDPGM instructions.
+ // This is done in different ways depending on how the VGPRs were allocated
+ // (i.e. whether we're in dynamic VGPR mode or not).
// Skip deallocation if kernel is waveslot limited vs VGPR limited. A short
// waveslot limited kernel runs slower with the deallocation.
- if (!ReleaseVGPRInsts.empty() &&
- (MF.getFrameInfo().hasCalls() ||
- ST->getOccupancyWithNumVGPRs(
- TRI->getNumUsedPhysRegs(*MRI, AMDGPU::VGPR_32RegClass)) <
- AMDGPU::IsaInfo::getMaxWavesPerEU(ST))) {
+ if (ST->isDynamicVGPREnabled()) {
for (MachineInstr *MI : ReleaseVGPRInsts) {
- if (ST->requiresNopBeforeDeallocVGPRs()) {
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- TII->get(AMDGPU::S_NOP))
- .addImm(0);
- }
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- TII->get(AMDGPU::S_SENDMSG))
- .addImm(AMDGPU::SendMsg::ID_DEALLOC_VGPRS_GFX11Plus);
+ TII->get(AMDGPU::S_ALLOC_VGPR))
+ .addImm(0);
Modified = true;
}
+ } else {
+ if (!ReleaseVGPRInsts.empty() &&
+ (MF.getFrameInfo().hasCalls() ||
+ ST->getOccupancyWithNumVGPRs(
+ TRI->getNumUsedPhysRegs(*MRI, AMDGPU::VGPR_32RegClass)) <
+ AMDGPU::IsaInfo::getMaxWavesPerEU(ST))) {
+ for (MachineInstr *MI : ReleaseVGPRInsts) {
+ if (ST->requiresNopBeforeDeallocVGPRs()) {
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::S_NOP))
+ .addImm(0);
+ }
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::S_SENDMSG))
+ .addImm(AMDGPU::SendMsg::ID_DEALLOC_VGPRS_GFX11Plus);
+ Modified = true;
+ }
+ }
}
ReleaseVGPRInsts.clear();
PreheadersToFlush.clear();
diff --git a/llvm/test/CodeGen/AMDGPU/release-vgprs-gfx12.mir b/llvm/test/CodeGen/AMDGPU/release-vgprs-gfx12.mir
new file mode 100644
index 0000000000000..884b5f8b6f018
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/release-vgprs-gfx12.mir
@@ -0,0 +1,356 @@
+# RUN: llc -O2 -march=amdgcn -mcpu=gfx1200 -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=CHECK,DEFAULT
+# RUN: llc -O2 -march=amdgcn -mcpu=gfx1200 -mattr=+dynamic-vgpr -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=CHECK,DVGPR
+
+--- |
+ define amdgpu_ps void @tbuffer_store1() { ret void }
+ define amdgpu_ps void @tbuffer_store2() { ret void }
+ define amdgpu_ps void @flat_store() { ret void }
+ define amdgpu_ps void @global_store() { ret void }
+ define amdgpu_ps void @buffer_store_format() { ret void }
+ define amdgpu_ps void @ds_write_b32() { ret void }
+ define amdgpu_ps void @global_store_dword() { ret void }
+ define amdgpu_ps void @multiple_basic_blocks1() { ret void }
+ define amdgpu_ps void @multiple_basic_blocks2() { ret void }
+ define amdgpu_ps void @multiple_basic_blocks3() { ret void }
+ define amdgpu_ps void @recursive_loop() { ret void }
+ define amdgpu_ps void @recursive_loop_vmem() { ret void }
+ define amdgpu_ps void @image_store() { ret void }
+ define amdgpu_ps void @scratch_store() { ret void }
+ define amdgpu_ps void @buffer_atomic() { ret void }
+ define amdgpu_ps void @flat_atomic() { ret void }
+ define amdgpu_ps void @global_atomic() { ret void }
+ define amdgpu_ps void @image_atomic() { ret void }
+ define amdgpu_ps void @global_store_optnone() noinline optnone { ret void }
+...
+
+---
+name: tbuffer_store1
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: tbuffer_store1
+ ; CHECK-NOT: S_SENDMSG 3
+ ; DEFAULT-NOT: S_ALLOC_VGPR
+ ; DVGPR: S_ALLOC_VGPR 0
+ ; CHECK: S_ENDPGM 0
+ TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: tbuffer_store2
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: tbuffer_store2
+ ; CHECK-NOT: S_SENDMSG 3
+ ; DEFAULT-NOT: S_ALLOC_VGPR
+ ; DVGPR: S_ALLOC_VGPR 0
+ ; CHECK: S_ENDPGM 0
+ TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7)
+ S_ENDPGM 0
+...
+
+---
+name: flat_store
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: flat_store
+ ; CHECK-NOT: S_SENDMSG 3
+ ; DEFAULT-NOT: S_ALLOC_VGPR
+ ; DVGPR: S_ALLOC_VGPR 0
+ ; CHECK: S_ENDPGM 0
+ FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, implicit $exec, implicit $flat_scr
+ S_ENDPGM 0
+...
+
+---
+name: global_store
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: global_store
+ ; CHECK-NOT: S_SENDMSG 3
+ ; DEFAULT-NOT: S_ALLOC_VGPR
+ ; DVGPR: S_ALLOC_VGPR 0
+ ; CHECK: S_ENDPGM 0
+ GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
+ S_WAIT_STORECNT 0
+ S_ENDPGM 0
+...
+
+---
+name: buffer_store_format
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: buffer_store_format
+ ; CHECK-NOT: S_SENDMSG 3
+ ; DEFAULT-NOT: S_ALLOC_VGPR
+ ; DVGPR: S_ALLOC_VGPR 0
+ ; CHECK: S_ENDPGM 0
+ BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: ds_write_b32
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: ds_write_b32
+ ; CHECK-NOT: S_SENDMSG 3
+ ; DEFAULT-NOT: S_ALLOC_VGPR
+ ; DVGPR: S_ALLOC_VGPR 0
+ ; CHECK: S_ENDPGM 0
+ renamable $vgpr0 = IMPLICIT_DEF
+ renamable $vgpr1 = IMPLICIT_DEF
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 12, 0, implicit $exec, implicit $m0
+ S_ENDPGM 0
+
+...
+---
+name: global_store_dword
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr0_sgpr1
+
+ ; CHECK-LABEL: name: global_store_dword
+ ; CHECK-NOT: S_SENDMSG 3
+ ; DEFAULT-NOT: S_ALLOC_VGPR
+ ; DVGPR: S_ALLOC_VGPR 0
+ ; CHECK: S_ENDPGM 0
+ renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec
+ GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: multiple_basic_blocks1
+body: |
+ ; CHECK-LABEL: name: multiple_basic_blocks1
+ ; CHECK-NOT: S_SENDMSG 3
+ ; DEFAULT-NOT: S_ALLOC_VGPR
+ ; DVGPR: S_ALLOC_VGPR 0
+ ; CHECK: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1
+
+ renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.1, %bb.2
+
+ $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
+ S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+name: multiple_basic_blocks2
+body: |
+ ; CHECK-LABEL: name: multiple_basic_blocks2
+ ; CHECK: bb.2:
+ ; CHECK-NOT: S_SENDMSG 3
+ ; DEFAULT-NOT: S_ALLOC_VGPR
+ ; DVGPR: S_ALLOC_VGPR 0
+ ; CHECK: S_ENDPGM 0
+ bb.0:
+ successors: %bb.2
+
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.1:
+ successors: %bb.2
+
+ $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: multiple_basic_blocks3
+body: |
+ ; CHECK-LABEL: name: multiple_basic_blocks3
+ ; CHECK: bb.4:
+ ; CHECK-NOT: S_SENDMSG 3
+ ; DEFAULT-NOT: S_ALLOC_VGPR
+ ; DVGPR: S_ALLOC_VGPR 0
+ ; CHECK: S_ENDPGM 0
+ bb.0:
+ successors: %bb.2
+
+ $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.1:
+ successors: %bb.2
+
+ $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.4
+
+ S_BRANCH %bb.4
+
+ bb.3:
+ successors: %bb.4
+
+ $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.4:
+ S_ENDPGM 0
+...
+
+---
+name: recursive_loop
+body: |
+ ; CHECK-LABEL: name: recursive_loop
+ ; CHECK-NOT: S_SENDMSG 3
+ ; DEFAULT-NOT: S_ALLOC_VGPR
+ ; DVGPR: S_ALLOC_VGPR 0
+ ; CHECK: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1
+
+ renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.1, %bb.2
+
+ S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: recursive_loop_vmem
+body: |
+ ; CHECK-LABEL: name: recursive_loop_vmem
+ ; CHECK-NOT: S_SENDMSG 3
+ ; DEFAULT-NOT: S_ALLOC_VGPR
+ ; DVGPR: S_ALLOC_VGPR 0
+ ; CHECK: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1
+
+ renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.1, %bb.2
+
+ TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec
+ S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: image_store
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: image_store
+ ; CHECK-NOT: S_SENDMSG 3
+ ; DEFAULT-NOT: S_ALLOC_VGPR
+ ; DVGPR: S_ALLOC_VGPR 0
+ ; CHECK: S_ENDPGM 0
+ IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7)
+ S_ENDPGM 0
+...
+
+---
+name: scratch_store
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: scratch_store
+ ; CHECK-NOT: S_SENDMSG 3
+ ; DEFAULT-NOT: S_ALLOC_VGPR
+ ; DVGPR: S_ALLOC_VGPR 0
+ ; CHECK: S_ENDPGM 0
+ renamable $sgpr0 = S_AND_B32 killed renamable $sgpr0, -16, implicit-def dead $scc
+ SCRATCH_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $sgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ S_ENDPGM 0
+...
+
+---
+name: buffer_atomic
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: buffer_atomic
+ ; CHECK-NOT: S_SENDMSG 3
+ ; DEFAULT-NOT: S_ALLOC_VGPR
+ ; DVGPR: S_ALLOC_VGPR 0
+ ; CHECK: S_ENDPGM 0
+ BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7)
+ S_ENDPGM 0
+...
+
+---
+name: flat_atomic
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: flat_atomic
+ ; CHECK-NOT: S_SENDMSG 3
+ ; DEFAULT-NOT: S_ALLOC_VGPR
+ ; DVGPR: S_ALLOC_VGPR 0
+ ; CHECK: S_ENDPGM 0
+ renamable $vgpr0_vgpr1 = FLAT_ATOMIC_DEC_X2_RTN killed renamable $vgpr0_vgpr1, killed renamable $vgpr2_vgpr3, 40, 1, implicit $exec, implicit $flat_scr
+ S_ENDPGM 0
+...
+
+
+---
+name: global_atomic
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: global_atomic
+ ; CHECK-NOT: S_SENDMSG 3
+ ; DEFAULT-NOT: S_ALLOC_VGPR
+ ; DVGPR: S_ALLOC_VGPR 0
+ ; CHECK: S_ENDPGM 0
+ renamable $vgpr0_vgpr1 = GLOBAL_ATOMIC_INC_X2_SADDR_RTN killed renamable $vgpr0, killed renamable $vgpr1_vgpr2, killed renamable $sgpr0_sgpr1, 40, 1, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: image_atomic
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: image_atomic
+ ; CHECK-NOT: S_SENDMSG 3
+ ; DEFAULT-NOT: S_ALLOC_VGPR
+ ; DVGPR: S_ALLOC_VGPR 0
+ ; CHECK: S_ENDPGM 0
+ renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7)
+ S_ENDPGM 0
+...
+
+---
+name: global_store_optnone
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: global_store_optnone
+ ; CHECK-NOT: S_SENDMSG 3
+ ; CHECK-NOT: S_ALLOC_VGPR
+ ; CHECK: S_ENDPGM 0
+ GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
+ S_WAIT_STORECNT 0
+ S_ENDPGM 0
+...
>From 6b7d1740c5c794040999cb36d62be565aebb5b91 Mon Sep 17 00:00:00 2001
From: Diana Picus <diana-magda.picus at amd.com>
Date: Thu, 6 Mar 2025 13:49:46 +0100
Subject: [PATCH 2/2] Remove useless -O2 from test
---
llvm/test/CodeGen/AMDGPU/release-vgprs-gfx12.mir | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/release-vgprs-gfx12.mir b/llvm/test/CodeGen/AMDGPU/release-vgprs-gfx12.mir
index 884b5f8b6f018..d465bf95fbfbe 100644
--- a/llvm/test/CodeGen/AMDGPU/release-vgprs-gfx12.mir
+++ b/llvm/test/CodeGen/AMDGPU/release-vgprs-gfx12.mir
@@ -1,5 +1,5 @@
-# RUN: llc -O2 -march=amdgcn -mcpu=gfx1200 -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=CHECK,DEFAULT
-# RUN: llc -O2 -march=amdgcn -mcpu=gfx1200 -mattr=+dynamic-vgpr -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=CHECK,DVGPR
+# RUN: llc -march=amdgcn -mcpu=gfx1200 -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=CHECK,DEFAULT
+# RUN: llc -march=amdgcn -mcpu=gfx1200 -mattr=+dynamic-vgpr -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=CHECK,DVGPR
--- |
define amdgpu_ps void @tbuffer_store1() { ret void }
More information about the llvm-commits
mailing list