[llvm] 6a8c205 - [AMDGPU] Avoid bundling a SCHED_BARRIER with memops (#153533)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 27 15:09:31 PDT 2025
Author: Yoonseo Choi
Date: 2025-08-27T17:09:27-05:00
New Revision: 6a8c20549e72e239fa39c77af72f719c6f7f0c6d
URL: https://github.com/llvm/llvm-project/commit/6a8c20549e72e239fa39c77af72f719c6f7f0c6d
DIFF: https://github.com/llvm/llvm-project/commit/6a8c20549e72e239fa39c77af72f719c6f7f0c6d.diff
LOG: [AMDGPU] Avoid bundling a SCHED_BARRIER with memops (#153533)
Avoid bundling a SCHED_BARRIER with memops. Memops are still can be
bundled and a SCHED_BARRIER ends the bundle. (e.g. [load, load, ...
SCHED_BARRIER(exclusive from the bundle) ), This is to honor the
SCHED_BARRIERs maximally without intervention of bundling.
If a SCHED_BARRIER is placed in a bundle between memops, the
SCHED_BARRIER is not used during IGroupLPMutation in postra mi-sched
phase. In addition, bundling memory ops with in-between SCHED_BARRIER
can prevent that SCHED_BARRIER or any neighboring SCHED_BARRIER being
honored. As users already provided SCHED_BARRIER between memory ops,
don't bundle it together with memory ops. Bypassing any bundling in a
MBB with a SCHED_BARRIER removes all those problems occur.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
index efdc55b8e68be..5720b978aada0 100644
--- a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
@@ -184,9 +184,11 @@ bool SIPostRABundler::run(MachineFunction &MF) {
if (I->getNumExplicitDefs() != 0)
Defs.insert(I->defs().begin()->getReg());
++ClauseLength;
- } else if (!I->isMetaInstruction()) {
- // Allow meta instructions in between bundle candidates, but do not
- // start or end a bundle on one.
+ } else if (!I->isMetaInstruction() ||
+ I->getOpcode() == AMDGPU::SCHED_BARRIER) {
+ // SCHED_BARRIER is not bundled to be honored by scheduler later.
+ // Allow other meta instructions in between bundle candidates, but do
+ // not start or end a bundle on one.
//
// TODO: It may be better to move meta instructions like dbg_value
// after the bundle. We're relying on the memory legalizer to unbundle
diff --git a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
index 1e6c00a54a012..d0d5cc11994af 100644
--- a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
+++ b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
@@ -351,3 +351,58 @@ body: |
$vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec
KILL killed $vgpr3_vgpr4, killed $vgpr5_vgpr6
...
+
+# Avoid bundling if a MBB has SCHED_BARRIER
+---
+name: no_sched_barrier_within_bundle
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABLE: name: no_sched_barrier_within_bundle
+ ; GCN: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
+ ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
+ ; GCN-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
+ ; GCN-NEXT: renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2
+ ; GCN-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0
+ ; GCN-NEXT: }
+ ; GCN-NEXT: renamable $sgpr2_sgpr3 = IMPLICIT_DEF
+ ; GCN-NEXT: renamable $vgpr10 = IMPLICIT_DEF
+ ; GCN-NEXT: renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
+ ; GCN-NEXT: renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
+ ; GCN-NEXT: SCHED_BARRIER 1924
+ ; GCN-NEXT: renamable $vgpr11 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr2_sgpr3, renamable $vgpr10, 0, 0, implicit $exec, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit $sgpr2_sgpr3, implicit $vgpr10
+ ; GCN-NEXT: SCHED_BARRIER 1924
+ ; GCN-NEXT: renamable $vgpr12 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr2_sgpr3, renamable $vgpr10, 512, 0, implicit $exec, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit $sgpr2_sgpr3, implicit $vgpr10
+ ; GCN-NEXT: renamable $sgpr4_sgpr5 = IMPLICIT_DEF
+ ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
+ ; GCN-NEXT: renamable $vgpr11 = nsw V_MUL_LO_U32_e64 killed $vgpr11, $vgpr11, implicit $exec
+ ; GCN-NEXT: renamable $vgpr12 = nsw V_MUL_LO_U32_e64 killed $vgpr12, $vgpr12, implicit $exec
+ ; GCN-NEXT: BUNDLE implicit killed $vgpr10, implicit killed $vgpr11, implicit killed $sgpr2_sgpr3, implicit $exec, implicit killed $vgpr12, implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $sgpr4_sgpr5, implicit killed $vgpr2 {
+ ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR renamable $vgpr10, killed renamable $vgpr11, renamable $sgpr2_sgpr3, 0, 0, implicit $exec, implicit killed $vgpr11
+ ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr10, killed renamable $vgpr12, killed renamable $sgpr2_sgpr3, 512, 0, implicit $exec
+ ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr4_sgpr5, 0, 0, implicit $exec
+ ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr4_sgpr5, 512, 0, implicit $exec
+ ; GCN-NEXT: }
+ ; GCN-NEXT: S_ENDPGM 0
+ renamable $sgpr0_sgpr1 = IMPLICIT_DEF
+ renamable $vgpr0 = IMPLICIT_DEF
+ renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2
+ renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0
+ renamable $sgpr2_sgpr3 = IMPLICIT_DEF
+ renamable $vgpr10 = IMPLICIT_DEF
+ renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
+ renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
+ SCHED_BARRIER 1924
+ renamable $vgpr11 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr2_sgpr3, renamable $vgpr10, 0, 0, implicit $exec, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit $sgpr2_sgpr3, implicit $vgpr10
+ SCHED_BARRIER 1924
+ renamable $vgpr12 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr2_sgpr3, renamable $vgpr10, 512, 0, implicit $exec, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit $sgpr2_sgpr3, implicit $vgpr10
+ renamable $sgpr4_sgpr5 = IMPLICIT_DEF
+ renamable $vgpr0 = IMPLICIT_DEF
+ renamable $vgpr11 = nsw V_MUL_LO_U32_e64 killed $vgpr11, $vgpr11, implicit $exec
+ renamable $vgpr12 = nsw V_MUL_LO_U32_e64 killed $vgpr12, $vgpr12, implicit $exec
+ GLOBAL_STORE_DWORD_SADDR renamable $vgpr10, killed renamable $vgpr11, renamable $sgpr2_sgpr3, 0, 0, implicit $exec, implicit killed $vgpr11
+ GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr10, killed renamable $vgpr12, killed renamable $sgpr2_sgpr3, 512, 0, implicit $exec
+ GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr4_sgpr5, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr4_sgpr5, 512, 0, implicit $exec
+ S_ENDPGM 0
+...
More information about the llvm-commits
mailing list