[llvm] [AMDGPU] Add IntrArgMemOnly, WriteOnly on LDS Ptr for raw.buffer.load… (PR #154306)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 19 03:33:25 PDT 2025
https://github.com/choikwa created https://github.com/llvm/llvm-project/pull/154306
….lds and struct.buffer.load.lds
Update testcase to look for correct intrinsic decls.
Diff from before:
<img width="1913" height="980" alt="{0F132745-23C5-4359-BF6B-BA56E68155DE}" src="https://github.com/user-attachments/assets/68097a97-e4ce-406f-a0d9-d73edbb08d54" />
>From 7e036da089d800c91094264eb200f4d41ea0c523 Mon Sep 17 00:00:00 2001
From: Kevin Choi <kevin.choi at amd.com>
Date: Tue, 19 Aug 2025 02:24:58 -0500
Subject: [PATCH] [AMDGPU] Add IntrArgMemOnly, WriteOnly on LDS Ptr for
raw.buffer.load.lds and struct.buffer.load.lds
Update testcase to look for correct intrinsic decls.
---
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 8 ++++--
.../Inline/AMDGPU/load-intrinsics.ll | 28 +++++++++++++------
2 files changed, 25 insertions(+), 11 deletions(-)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index abd83c7c4d4a7..646c6c86523b1 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1991,7 +1991,9 @@ class AMDGPURawBufferLoadLDS : Intrinsic <
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
- [IntrWillReturn, NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>,
+ [IntrWillReturn, IntrArgMemOnly,
+ WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
+ ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>,
ImmArg<ArgIndex<6>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
def int_amdgcn_raw_buffer_load_lds : AMDGPURawBufferLoadLDS;
@@ -2035,7 +2037,9 @@ class AMDGPUStructBufferLoadLDS : Intrinsic <
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
- [IntrWillReturn, NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>,
+ [IntrWillReturn, IntrArgMemOnly,
+ WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
+ ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>,
ImmArg<ArgIndex<7>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
def int_amdgcn_struct_buffer_load_lds : AMDGPUStructBufferLoadLDS;
diff --git a/llvm/test/Transforms/Inline/AMDGPU/load-intrinsics.ll b/llvm/test/Transforms/Inline/AMDGPU/load-intrinsics.ll
index 46f53d8f82cfd..10682566ef034 100644
--- a/llvm/test/Transforms/Inline/AMDGPU/load-intrinsics.ll
+++ b/llvm/test/Transforms/Inline/AMDGPU/load-intrinsics.ll
@@ -3,44 +3,54 @@
; This test tests if the load intrinsic gets correct memory(argmem: read) attribute and
; the call instruction is assigned correct !alias.scope metadata post inlining
-
-define void @caller(ptr addrspace(3) %addr_f, ptr addrspace(1) %use_f) {
+define void @caller(ptr addrspace(3) %addr_f, ptr addrspace(1) %use_f, <4 x i32> %rsrc_f) {
; OPT-LABEL: define void @caller(
-; OPT-SAME: ptr addrspace(3) [[ADDR_F:%.*]], ptr addrspace(1) [[USE_F:%.*]]) {
+; OPT-SAME: ptr addrspace(3) [[ADDR_F:%.*]], ptr addrspace(1) [[USE_F:%.*]], <4 x i32> [[RSRC_F:%.*]]) {
; OPT-NEXT: [[ENTRY:.*:]]
; OPT-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]])
; OPT-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
; OPT-NEXT: [[GEP_I:%.*]] = getelementptr i64, ptr addrspace(3) [[ADDR_F]], i32 4
; OPT-NEXT: [[VAL_I:%.*]] = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) [[GEP_I]]), !alias.scope [[META0]], !noalias [[META3]]
; OPT-NEXT: store <2 x i32> [[VAL_I]], ptr addrspace(1) [[USE_F]], align 8, !alias.scope [[META3]], !noalias [[META0]]
+; OPT-NEXT: call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> [[RSRC_F]], ptr addrspace(3) [[GEP_I]], i32 16, i32 16, i32 16, i32 0, i32 11), !alias.scope [[META0]], !noalias [[META3]]
+; OPT-NEXT: call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> [[RSRC_F]], ptr addrspace(3) [[GEP_I]], i32 4, i32 8, i32 0, i32 0, i32 0, i32 0), !alias.scope [[META0]], !noalias [[META3]]
; OPT-NEXT: ret void
;
entry:
- call void @callee(ptr addrspace(3) %addr_f, ptr addrspace(1) %use_f)
+ call void @callee(ptr addrspace(3) %addr_f, ptr addrspace(1) %use_f, <4 x i32> %rsrc_f)
ret void
}
-define void @callee(ptr addrspace(3) noalias %addr, ptr addrspace(1) noalias %use) {
+define void @callee(ptr addrspace(3) noalias %addr, ptr addrspace(1) noalias %use, <4 x i32> %rsrc) {
; OPT-LABEL: define void @callee(
-; OPT-SAME: ptr addrspace(3) noalias [[ADDR:%.*]], ptr addrspace(1) noalias [[USE:%.*]]) {
+; OPT-SAME: ptr addrspace(3) noalias [[ADDR:%.*]], ptr addrspace(1) noalias [[USE:%.*]], <4 x i32> [[RSRC:%.*]]) {
; OPT-NEXT: [[ENTRY:.*:]]
; OPT-NEXT: [[GEP:%.*]] = getelementptr i64, ptr addrspace(3) [[ADDR]], i32 4
; OPT-NEXT: [[VAL:%.*]] = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) [[GEP]])
; OPT-NEXT: store <2 x i32> [[VAL]], ptr addrspace(1) [[USE]], align 8
+; OPT-NEXT: call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> [[RSRC]], ptr addrspace(3) [[GEP]], i32 16, i32 16, i32 16, i32 0, i32 11)
+; OPT-NEXT: call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> [[RSRC]], ptr addrspace(3) [[GEP]], i32 4, i32 8, i32 0, i32 0, i32 0, i32 0)
; OPT-NEXT: ret void
;
entry:
%gep = getelementptr i64, ptr addrspace(3) %addr, i32 4
- %val = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32.p3(ptr addrspace(3) %gep)
+ %val = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) %gep)
store <2 x i32> %val, ptr addrspace(1) %use
+ call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %gep, i32 16, i32 16, i32 16, i32 0, i32 11)
+ call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %gep, i32 4, i32 8, i32 0, i32 0, i32 0, i32 0)
ret void
}
-;.
; Check Function Attribute on decl
; OPT: declare <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) captures(none)) #[[ATTR0:[0-9]+]]
+; OPT: declare void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32>, ptr addrspace(3) writeonly captures(none), i32 immarg, i32, i32, i32 immarg, i32 immarg) #[[ATTR1:[0-9]+]]
+; OPT: declare void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32>, ptr addrspace(3) writeonly captures(none), i32 immarg, i32, i32, i32, i32 immarg, i32 immarg) #[[ATTR1]]
declare <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3))
+declare void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32>, ptr addrspace(3), i32, i32, i32, i32, i32)
+declare void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32>, ptr addrspace(3), i32, i32, i32, i32, i32, i32)
+;.
; OPT: attributes #[[ATTR0]] = { convergent nocallback nofree nounwind willreturn memory(argmem: read) }
-; OPT: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+; OPT: attributes #[[ATTR1]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+; OPT: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
;.
; OPT: [[META0]] = !{[[META1:![0-9]+]]}
; OPT: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], !"callee: %addr"}
More information about the llvm-commits
mailing list