[llvm] [AMDGPU] Add target feature for waits before system scope stores. NFC. (PR #164993)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 24 09:23:07 PDT 2025
https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/164993
>From 5184375a90f176eca23a1bfb50d9d98d81cf7149 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 24 Oct 2025 15:23:04 +0100
Subject: [PATCH 1/2] [AMDGPU] Add target feature for waits before system scope
stores. NFC.
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 9 +++-
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 5 ++
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 3 +-
.../wait-before-stores-with-scope_sys.ll | 50 +++++++++++++++----
4 files changed, 54 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 1c8383c3a682d..f78a2c0029dc9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1466,6 +1466,12 @@ def FeatureClusters : SubtargetFeature< "clusters",
"Has clusters of workgroups support"
>;
+def FeatureWaitsBeforeSystemScopeStores
+ : SubtargetFeature<"waits-before-system-scope-stores",
+ "RequiresWaitsBeforeSystemScopeStores", "true",
+ "Target requires waits for loads and atomics before "
+ "system scope stores">;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
@@ -2060,7 +2066,8 @@ def FeatureISAVersion12 : FeatureSet<
FeatureMaxHardClauseLength32,
Feature1_5xVGPRs,
FeatureMemoryAtomicFAddF32DenormalSupport,
- FeatureBVHDualAndBVH8Insts
+ FeatureBVHDualAndBVH8Insts,
+ FeatureWaitsBeforeSystemScopeStores,
]>;
def FeatureISAVersion12_50 : FeatureSet<
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index ac660d5fada79..f377b8aaf1333 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -290,6 +290,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool Has45BitNumRecordsBufferResource = false;
bool HasClusters = false;
+ bool RequiresWaitsBeforeSystemScopeStores = false;
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable = false;
@@ -1861,6 +1862,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool has45BitNumRecordsBufferResource() const {
return Has45BitNumRecordsBufferResource;
}
+
+ bool requiresWaitsBeforeSystemScopeStores() const {
+ return RequiresWaitsBeforeSystemScopeStores;
+ }
};
class GCNUserSGPRUsageInfo {
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 07264d973648f..d9f51d7b6592a 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -2673,7 +2673,8 @@ bool SIGfx12CacheControl::finalizeStore(MachineInstr &MI, bool Atomic) const {
const unsigned Scope = CPol->getImm() & CPol::SCOPE;
// GFX12.0 only: Extra waits needed before system scope stores.
- if (!ST.hasGFX1250Insts() && !Atomic && Scope == CPol::SCOPE_SYS)
+ if (ST.requiresWaitsBeforeSystemScopeStores() && !Atomic &&
+ Scope == CPol::SCOPE_SYS)
Changed |= insertWaitsBeforeSystemScopeStore(MI.getIterator());
return Changed;
diff --git a/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll b/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll
index 2d7a91f0cd114..985bcbd6ff4f4 100644
--- a/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll
+++ b/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll
@@ -1,22 +1,50 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-GISEL %s
define amdgpu_ps void @intrinsic_store_system_scope(i32 %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-; GFX12-LABEL: intrinsic_store_system_scope:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: buffer_store_b32 v0, v[1:2], s[0:3], s4 idxen offen scope:SCOPE_SYS
-; GFX12-NEXT: s_endpgm
+; GFX1200-LABEL: intrinsic_store_system_scope:
+; GFX1200: ; %bb.0:
+; GFX1200-NEXT: buffer_store_b32 v0, v[1:2], s[0:3], s4 idxen offen scope:SCOPE_SYS
+; GFX1200-NEXT: s_endpgm
+;
+; GFX1250-SDAG-LABEL: intrinsic_store_system_scope:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-SDAG-NEXT: buffer_store_b32 v0, v[2:3], s[0:3], s4 idxen offen scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: intrinsic_store_system_scope:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX1250-GISEL-NEXT: buffer_store_b32 v0, v[4:5], s[0:3], s4 idxen offen scope:SCOPE_SYS
+; GFX1250-GISEL-NEXT: s_endpgm
call void @llvm.amdgcn.struct.buffer.store.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 24)
ret void
}
define amdgpu_ps void @generic_store_volatile(i32 %val, ptr addrspace(1) %out) {
-; GFX12-LABEL: generic_store_volatile:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: global_store_b32 v[1:2], v0, off scope:SCOPE_SYS
-; GFX12-NEXT: s_wait_storecnt 0x0
-; GFX12-NEXT: s_endpgm
+; GFX1200-LABEL: generic_store_volatile:
+; GFX1200: ; %bb.0:
+; GFX1200-NEXT: global_store_b32 v[1:2], v0, off scope:SCOPE_SYS
+; GFX1200-NEXT: s_wait_storecnt 0x0
+; GFX1200-NEXT: s_endpgm
+;
+; GFX1250-SDAG-LABEL: generic_store_volatile:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-SDAG-NEXT: global_store_b32 v[2:3], v0, off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: generic_store_volatile:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX1250-GISEL-NEXT: global_store_b32 v[4:5], v0, off scope:SCOPE_SYS
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
+; GFX1250-GISEL-NEXT: s_endpgm
store volatile i32 %val, ptr addrspace(1) %out
ret void
}
>From 474970d10a2db5095bd4293383f3fc282fce7173 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 24 Oct 2025 17:22:15 +0100
Subject: [PATCH 2/2] Manually reformat AMDGPU.td parts
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index f78a2c0029dc9..54d94b1f8682e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1466,11 +1466,12 @@ def FeatureClusters : SubtargetFeature< "clusters",
"Has clusters of workgroups support"
>;
-def FeatureWaitsBeforeSystemScopeStores
- : SubtargetFeature<"waits-before-system-scope-stores",
- "RequiresWaitsBeforeSystemScopeStores", "true",
- "Target requires waits for loads and atomics before "
- "system scope stores">;
+def FeatureWaitsBeforeSystemScopeStores : SubtargetFeature<
+ "waits-before-system-scope-stores",
+ "RequiresWaitsBeforeSystemScopeStores",
+ "true",
+ "Target requires waits for loads and atomics before system scope stores"
+>;
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
More information about the llvm-commits
mailing list