[llvm] 6f7c77f - [AMDGPU] Check noalias.addrspace in mayAccessScratchThroughFlat (#151319)

via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 18 22:43:01 PDT 2025


Author: Pierre van Houtryve
Date: 2025-08-19T07:42:59+02:00
New Revision: 6f7c77fe900ec41b75ca1b9f43355ad8e3e170d7

URL: https://github.com/llvm/llvm-project/commit/6f7c77fe900ec41b75ca1b9f43355ad8e3e170d7
DIFF: https://github.com/llvm/llvm-project/commit/6f7c77fe900ec41b75ca1b9f43355ad8e3e170d7.diff

LOG: [AMDGPU] Check noalias.addrspace in mayAccessScratchThroughFlat (#151319)

PR #149247 made the MD accessible by the backend so we can now leverage
it in the memory model. The first use case here is detecting if a flat op
can access scratch memory.
Benefits both the MemoryLegalizer and InsertWaitCnt.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
    llvm/test/CodeGen/AMDGPU/gfx1250-scratch-scope-se.ll
    llvm/test/Transforms/AtomicExpand/AMDGPU/expand-cmpxchg-flat-maybe-private.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 072fb9cc547b0..a2084074263da 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -17793,23 +17793,9 @@ atomicSupportedIfLegalIntType(const AtomicRMWInst *RMW) {
 
 /// Return if a flat address space atomicrmw can access private memory.
 static bool flatInstrMayAccessPrivate(const Instruction *I) {
-  const MDNode *NoaliasAddrSpaceMD =
-      I->getMetadata(LLVMContext::MD_noalias_addrspace);
-  if (!NoaliasAddrSpaceMD)
-    return true;
-
-  for (unsigned I = 0, E = NoaliasAddrSpaceMD->getNumOperands() / 2; I != E;
-       ++I) {
-    auto *Low = mdconst::extract<ConstantInt>(
-        NoaliasAddrSpaceMD->getOperand(2 * I + 0));
-    if (Low->getValue().uge(AMDGPUAS::PRIVATE_ADDRESS)) {
-      auto *High = mdconst::extract<ConstantInt>(
-          NoaliasAddrSpaceMD->getOperand(2 * I + 1));
-      return High->getValue().ule(AMDGPUAS::PRIVATE_ADDRESS);
-    }
-  }
-
-  return true;
+  const MDNode *MD = I->getMetadata(LLVMContext::MD_noalias_addrspace);
+  return !MD ||
+         !AMDGPU::hasValueInRangeLikeMetadata(*MD, AMDGPUAS::PRIVATE_ADDRESS);
 }
 
 TargetLowering::AtomicExpansionKind

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 10f29b3a4559b..3bbf27b95d798 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4267,12 +4267,15 @@ bool SIInstrInfo::mayAccessScratchThroughFlat(const MachineInstr &MI) const {
   if (MI.memoperands_empty())
     return true;
 
-  // TODO (?): Does this need to be taught how to read noalias.addrspace ?
-
   // See if any memory operand specifies an address space that involves scratch.
   return any_of(MI.memoperands(), [](const MachineMemOperand *Memop) {
     unsigned AS = Memop->getAddrSpace();
-    return AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS;
+    if (AS == AMDGPUAS::FLAT_ADDRESS) {
+      const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
+      return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
+                        *MD, AMDGPUAS::PRIVATE_ADDRESS);
+    }
+    return AS == AMDGPUAS::PRIVATE_ADDRESS;
   });
 }
 

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 42a64f0601cb8..6e4e087ed276e 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -21,6 +21,7 @@
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsR600.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
@@ -1677,6 +1678,29 @@ getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size) {
   return Vals;
 }
 
+bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val) {
+  assert((MD.getNumOperands() % 2 == 0) && "invalid number of operands!");
+  for (unsigned I = 0, E = MD.getNumOperands() / 2; I != E; ++I) {
+    auto Low =
+        mdconst::extract<ConstantInt>(MD.getOperand(2 * I + 0))->getValue();
+    auto High =
+        mdconst::extract<ConstantInt>(MD.getOperand(2 * I + 1))->getValue();
+    // There are two types of [A; B) ranges:
+    //  A < B, e.g. [4; 5) which is a range that only includes 4.
+    //  A > B, e.g. [5; 4) which is a range that wraps around and includes
+    //         everything except 4.
+    if (Low.ult(High)) {
+      if (Low.ule(Val) && High.ugt(Val))
+        return true;
+    } else {
+      if (Low.uge(Val) && High.ult(Val))
+        return true;
+    }
+  }
+
+  return false;
+}
+
 unsigned getVmcntBitMask(const IsaVersion &Version) {
   return (1 << (getVmcntBitWidthLo(Version.Major) +
                 getVmcntBitWidthHi(Version.Major))) -

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index befab68bb5698..70dfb63cbe040 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -35,6 +35,7 @@ class MCInstrInfo;
 class MCRegisterClass;
 class MCRegisterInfo;
 class MCSubtargetInfo;
+class MDNode;
 class StringRef;
 class Triple;
 class raw_ostream;
@@ -1064,6 +1065,9 @@ SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
 std::optional<SmallVector<unsigned>>
 getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size);
 
+/// Checks if \p Val is inside \p MD, a !range-like metadata.
+bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val);
+
 /// Represents the counter values to wait for in an s_waitcnt instruction.
 ///
 /// Large values (including the maximum possible integer) can be used to

diff  --git a/llvm/test/CodeGen/AMDGPU/gfx1250-scratch-scope-se.ll b/llvm/test/CodeGen/AMDGPU/gfx1250-scratch-scope-se.ll
index d1e82a06077f5..99025f0a983c0 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx1250-scratch-scope-se.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx1250-scratch-scope-se.ll
@@ -39,20 +39,19 @@ define void @test_flat_store_no_scratch_alloc(ptr %ptr, i32 %val) #0 {
     ret void
 }
 
-; TODO: handle
 define void @test_flat_store_noalias_addrspace(ptr %ptr, i32 %val) {
 ; GCN-LABEL: test_flat_store_noalias_addrspace:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_wait_loadcnt_dscnt 0x0
 ; GCN-NEXT:    s_wait_kmcnt 0x0
-; GCN-NEXT:    flat_store_b32 v[0:1], v2 scope:SCOPE_SE
+; GCN-NEXT:    flat_store_b32 v[0:1], v2
 ; GCN-NEXT:    s_wait_dscnt 0x0
 ; GCN-NEXT:    s_set_pc_i64 s[30:31]
     store i32 %val, ptr %ptr, !noalias.addrspace !{i32 5, i32 6}
     ret void
 }
 
-; TODO: would be nice to handle too
+; TODO: would be nice to handle
 define void @test_flat_store_select(ptr addrspace(1) %a, ptr addrspace(3) %b, i1 %cond, i32 %val) {
 ; GCN-SDAG-LABEL: test_flat_store_select:
 ; GCN-SDAG:       ; %bb.0:

diff  --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-cmpxchg-flat-maybe-private.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-cmpxchg-flat-maybe-private.ll
index 4b4994958e66a..a9e4f10dc9a53 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-cmpxchg-flat-maybe-private.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-cmpxchg-flat-maybe-private.ll
@@ -308,24 +308,7 @@ define { i64, i1 } @cmpxchg_flat_agent_i64__noalias_addrspace_edge_case0(ptr %pt
 define { i64, i1 } @cmpxchg_flat_agent_i64__no_2_6(ptr %ptr, i64 %val, i64 %swap) {
 ; CHECK-LABEL: define { i64, i1 } @cmpxchg_flat_agent_i64__no_2_6(
 ; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]], i64 [[SWAP:%.*]]) {
-; CHECK-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PTR]])
-; CHECK-NEXT:    br i1 [[IS_PRIVATE]], label %[[ATOMICRMW_PRIVATE:.*]], label %[[ATOMICRMW_GLOBAL:.*]]
-; CHECK:       [[ATOMICRMW_PRIVATE]]:
-; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5)
-; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[TMP2]], [[VAL]]
-; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i64 [[SWAP]], i64 [[TMP2]]
-; CHECK-NEXT:    store i64 [[TMP4]], ptr addrspace(5) [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP5:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP2]], 0
-; CHECK-NEXT:    [[TMP6:%.*]] = insertvalue { i64, i1 } [[TMP5]], i1 [[TMP3]], 1
-; CHECK-NEXT:    br label %[[ATOMICRMW_PHI:.*]]
-; CHECK:       [[ATOMICRMW_GLOBAL]]:
-; CHECK-NEXT:    [[TMP7:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL]], i64 [[SWAP]] syncscope("agent") monotonic seq_cst, align 8, !noalias.addrspace [[META0]]
-; CHECK-NEXT:    br label %[[ATOMICRMW_PHI]]
-; CHECK:       [[ATOMICRMW_PHI]]:
-; CHECK-NEXT:    [[RESULT:%.*]] = phi { i64, i1 } [ [[TMP6]], %[[ATOMICRMW_PRIVATE]] ], [ [[TMP7]], %[[ATOMICRMW_GLOBAL]] ]
-; CHECK-NEXT:    br label %[[ATOMICRMW_END:.*]]
-; CHECK:       [[ATOMICRMW_END]]:
+; CHECK-NEXT:    [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL]], i64 [[SWAP]] syncscope("agent") monotonic seq_cst, align 8, !noalias.addrspace [[META4:![0-9]+]]
 ; CHECK-NEXT:    ret { i64, i1 } [[RESULT]]
 ;
   %result = cmpxchg ptr %ptr, i64 %val, i64 %swap syncscope("agent") monotonic seq_cst, !noalias.addrspace !7
@@ -335,7 +318,7 @@ define { i64, i1 } @cmpxchg_flat_agent_i64__no_2_6(ptr %ptr, i64 %val, i64 %swap
 define { i64, i1 } @cmpxchg_flat_agent_i64__no_2_3_5(ptr %ptr, i64 %val, i64 %swap) {
 ; CHECK-LABEL: define { i64, i1 } @cmpxchg_flat_agent_i64__no_2_3_5(
 ; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]], i64 [[SWAP:%.*]]) {
-; CHECK-NEXT:    [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL]], i64 [[SWAP]] syncscope("agent") monotonic seq_cst, align 8, !noalias.addrspace [[META4:![0-9]+]]
+; CHECK-NEXT:    [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL]], i64 [[SWAP]] syncscope("agent") monotonic seq_cst, align 8, !noalias.addrspace [[META5:![0-9]+]]
 ; CHECK-NEXT:    ret { i64, i1 } [[RESULT]]
 ;
   %result = cmpxchg ptr %ptr, i64 %val, i64 %swap syncscope("agent") monotonic seq_cst, !noalias.addrspace !8
@@ -357,5 +340,6 @@ define { i64, i1 } @cmpxchg_flat_agent_i64__no_2_3_5(ptr %ptr, i64 %val, i64 %sw
 ; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META3:![0-9]+]]}
 ; CHECK: [[META2]] = !{!"foo", !"bar"}
 ; CHECK: [[META3]] = !{!"bux", !"baz"}
-; CHECK: [[META4]] = !{i32 2, i32 4, i32 5, i32 6}
+; CHECK: [[META4]] = !{i32 2, i32 6}
+; CHECK: [[META5]] = !{i32 2, i32 4, i32 5, i32 6}
 ;.


        


More information about the llvm-commits mailing list