[llvm] [AMDGPU] Add IntrArgMemOnly, WriteOnly on LDS Ptr for raw.buffer.load… (PR #154306)

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 19 03:33:25 PDT 2025


https://github.com/choikwa created https://github.com/llvm/llvm-project/pull/154306

….lds and struct.buffer.load.lds

Update testcase to look for correct intrinsic decls.

Diff from before:
<img width="1913" height="980" alt="{0F132745-23C5-4359-BF6B-BA56E68155DE}" src="https://github.com/user-attachments/assets/68097a97-e4ce-406f-a0d9-d73edbb08d54" />


>From 7e036da089d800c91094264eb200f4d41ea0c523 Mon Sep 17 00:00:00 2001
From: Kevin Choi <kevin.choi at amd.com>
Date: Tue, 19 Aug 2025 02:24:58 -0500
Subject: [PATCH] [AMDGPU] Add IntrArgMemOnly, WriteOnly on LDS Ptr for
 raw.buffer.load.lds and struct.buffer.load.lds

Update testcase to look for correct intrinsic decls.
---
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td      |  8 ++++--
 .../Inline/AMDGPU/load-intrinsics.ll          | 28 +++++++++++++------
 2 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index abd83c7c4d4a7..646c6c86523b1 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1991,7 +1991,9 @@ class AMDGPURawBufferLoadLDS : Intrinsic <
                               //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
                               //                bit 6 = swz
                               //           all: volatile op (bit 31, stripped at lowering)
-  [IntrWillReturn, NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>,
+  [IntrWillReturn, IntrArgMemOnly,
+   WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
+   ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>,
    ImmArg<ArgIndex<6>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
 def int_amdgcn_raw_buffer_load_lds : AMDGPURawBufferLoadLDS;
 
@@ -2035,7 +2037,9 @@ class AMDGPUStructBufferLoadLDS : Intrinsic <
                               //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
                               //                bit 6 = swz
                               //           all: volatile op (bit 31, stripped at lowering)
-  [IntrWillReturn, NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>,
+  [IntrWillReturn, IntrArgMemOnly,
+   WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
+   ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>,
    ImmArg<ArgIndex<7>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
 def int_amdgcn_struct_buffer_load_lds : AMDGPUStructBufferLoadLDS;
 
diff --git a/llvm/test/Transforms/Inline/AMDGPU/load-intrinsics.ll b/llvm/test/Transforms/Inline/AMDGPU/load-intrinsics.ll
index 46f53d8f82cfd..10682566ef034 100644
--- a/llvm/test/Transforms/Inline/AMDGPU/load-intrinsics.ll
+++ b/llvm/test/Transforms/Inline/AMDGPU/load-intrinsics.ll
@@ -3,44 +3,54 @@
 
 ; This test tests if the load intrinsic gets correct memory(argmem: read) attribute and
 ; the call instruction is assigned correct !alias.scope metadata post inlining
-
-define void @caller(ptr addrspace(3) %addr_f, ptr addrspace(1) %use_f) {
+define void @caller(ptr addrspace(3) %addr_f, ptr addrspace(1) %use_f, <4 x i32> %rsrc_f) {
 ; OPT-LABEL: define void @caller(
-; OPT-SAME: ptr addrspace(3) [[ADDR_F:%.*]], ptr addrspace(1) [[USE_F:%.*]]) {
+; OPT-SAME: ptr addrspace(3) [[ADDR_F:%.*]], ptr addrspace(1) [[USE_F:%.*]], <4 x i32> [[RSRC_F:%.*]]) {
 ; OPT-NEXT:  [[ENTRY:.*:]]
 ; OPT-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]])
 ; OPT-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
 ; OPT-NEXT:    [[GEP_I:%.*]] = getelementptr i64, ptr addrspace(3) [[ADDR_F]], i32 4
 ; OPT-NEXT:    [[VAL_I:%.*]] = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) [[GEP_I]]), !alias.scope [[META0]], !noalias [[META3]]
 ; OPT-NEXT:    store <2 x i32> [[VAL_I]], ptr addrspace(1) [[USE_F]], align 8, !alias.scope [[META3]], !noalias [[META0]]
+; OPT-NEXT:    call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> [[RSRC_F]], ptr addrspace(3) [[GEP_I]], i32 16, i32 16, i32 16, i32 0, i32 11), !alias.scope [[META0]], !noalias [[META3]]
+; OPT-NEXT:    call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> [[RSRC_F]], ptr addrspace(3) [[GEP_I]], i32 4, i32 8, i32 0, i32 0, i32 0, i32 0), !alias.scope [[META0]], !noalias [[META3]]
 ; OPT-NEXT:    ret void
 ;
 entry:
-  call void @callee(ptr addrspace(3) %addr_f, ptr addrspace(1) %use_f)
+  call void @callee(ptr addrspace(3) %addr_f, ptr addrspace(1) %use_f, <4 x i32> %rsrc_f)
   ret void
 }
 
-define void @callee(ptr addrspace(3) noalias %addr, ptr addrspace(1) noalias %use) {
+define void @callee(ptr addrspace(3) noalias %addr, ptr addrspace(1) noalias %use, <4 x i32> %rsrc) {
 ; OPT-LABEL: define void @callee(
-; OPT-SAME: ptr addrspace(3) noalias [[ADDR:%.*]], ptr addrspace(1) noalias [[USE:%.*]]) {
+; OPT-SAME: ptr addrspace(3) noalias [[ADDR:%.*]], ptr addrspace(1) noalias [[USE:%.*]], <4 x i32> [[RSRC:%.*]]) {
 ; OPT-NEXT:  [[ENTRY:.*:]]
 ; OPT-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr addrspace(3) [[ADDR]], i32 4
 ; OPT-NEXT:    [[VAL:%.*]] = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) [[GEP]])
 ; OPT-NEXT:    store <2 x i32> [[VAL]], ptr addrspace(1) [[USE]], align 8
+; OPT-NEXT:    call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> [[RSRC]], ptr addrspace(3) [[GEP]], i32 16, i32 16, i32 16, i32 0, i32 11)
+; OPT-NEXT:    call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> [[RSRC]], ptr addrspace(3) [[GEP]], i32 4, i32 8, i32 0, i32 0, i32 0, i32 0)
 ; OPT-NEXT:    ret void
 ;
 entry:
   %gep = getelementptr i64, ptr addrspace(3) %addr, i32 4
-  %val = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32.p3(ptr addrspace(3) %gep)
+  %val = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) %gep)
   store <2 x i32> %val, ptr addrspace(1) %use
+  call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %gep, i32 16, i32 16, i32 16, i32 0, i32 11)
+  call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %gep, i32 4, i32 8, i32 0, i32 0, i32 0, i32 0)
   ret void
 }
-;.
 ; Check Function Attribute on decl
 ; OPT: declare <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) captures(none)) #[[ATTR0:[0-9]+]]
+; OPT: declare void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32>, ptr addrspace(3) writeonly captures(none), i32 immarg, i32, i32, i32 immarg, i32 immarg) #[[ATTR1:[0-9]+]]
+; OPT: declare void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32>, ptr addrspace(3) writeonly captures(none), i32 immarg, i32, i32, i32, i32 immarg, i32 immarg) #[[ATTR1]]
 declare <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3))
+declare void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32>, ptr addrspace(3), i32, i32, i32, i32, i32)
+declare void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32>, ptr addrspace(3), i32, i32, i32, i32, i32, i32)
+;.
 ; OPT: attributes #[[ATTR0]] = { convergent nocallback nofree nounwind willreturn memory(argmem: read) }
-; OPT: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+; OPT: attributes #[[ATTR1]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+; OPT: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
 ;.
 ; OPT: [[META0]] = !{[[META1:![0-9]+]]}
 ; OPT: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], !"callee: %addr"}



More information about the llvm-commits mailing list