[llvm] 1d30f71 - [AMDGPU] Make ds/global load intrinsics IntrArgMemOnly (#152792)

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 12 05:51:42 PDT 2025


Author: choikwa
Date: 2025-08-12T21:51:39+09:00
New Revision: 1d30f71b215b3adf6a51b7dc0b454dcadc4c4bc3

URL: https://github.com/llvm/llvm-project/commit/1d30f71b215b3adf6a51b7dc0b454dcadc4c4bc3
DIFF: https://github.com/llvm/llvm-project/commit/1d30f71b215b3adf6a51b7dc0b454dcadc4c4bc3.diff

LOG: [AMDGPU] Make ds/global load intrinsics IntrArgMemOnly (#152792)

This along with IntrReadMem means that the Intrinsic only reads memory
through the given argument ptr and its derivatives. This allows passes
like Inliner to attach alias.scope to the call instruction as it sees
that no other memory is accessed.

Discovered via SWDEV-543741

---------

Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>

Added: 
    llvm/test/Transforms/Inline/AMDGPU/load-intrinsics.ll

Modified: 
    llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 90cfd8cedd51b..be6df257f668b 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -3087,7 +3087,7 @@ class AMDGPULoadIntrinsic<LLVMType ptr_ty>:
   Intrinsic<
     [llvm_any_ty],
     [ptr_ty],
-    [IntrReadMem, IntrWillReturn, IntrConvergent, NoCapture<ArgIndex<0>>, IntrNoCallback, IntrNoFree],
+    [IntrReadMem, IntrArgMemOnly, IntrWillReturn, IntrConvergent, NoCapture<ArgIndex<0>>, IntrNoCallback, IntrNoFree],
     "",
     [SDNPMemOperand]
   >;

diff  --git a/llvm/test/Transforms/Inline/AMDGPU/load-intrinsics.ll b/llvm/test/Transforms/Inline/AMDGPU/load-intrinsics.ll
new file mode 100644
index 0000000000000..46f53d8f82cfd
--- /dev/null
+++ b/llvm/test/Transforms/Inline/AMDGPU/load-intrinsics.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt -mtriple=amdgcn --passes=inline --enable-noalias-to-md-conversion -S %s | FileCheck --check-prefix=OPT %s
+
+; This test tests if the load intrinsic gets correct memory(argmem: read) attribute and
+; the call instruction is assigned correct !alias.scope metadata post inlining
+
+define void @caller(ptr addrspace(3) %addr_f, ptr addrspace(1) %use_f) {
+; OPT-LABEL: define void @caller(
+; OPT-SAME: ptr addrspace(3) [[ADDR_F:%.*]], ptr addrspace(1) [[USE_F:%.*]]) {
+; OPT-NEXT:  [[ENTRY:.*:]]
+; OPT-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]])
+; OPT-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
+; OPT-NEXT:    [[GEP_I:%.*]] = getelementptr i64, ptr addrspace(3) [[ADDR_F]], i32 4
+; OPT-NEXT:    [[VAL_I:%.*]] = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) [[GEP_I]]), !alias.scope [[META0]], !noalias [[META3]]
+; OPT-NEXT:    store <2 x i32> [[VAL_I]], ptr addrspace(1) [[USE_F]], align 8, !alias.scope [[META3]], !noalias [[META0]]
+; OPT-NEXT:    ret void
+;
+entry:
+  call void @callee(ptr addrspace(3) %addr_f, ptr addrspace(1) %use_f)
+  ret void
+}
+
+define void @callee(ptr addrspace(3) noalias %addr, ptr addrspace(1) noalias %use) {
+; OPT-LABEL: define void @callee(
+; OPT-SAME: ptr addrspace(3) noalias [[ADDR:%.*]], ptr addrspace(1) noalias [[USE:%.*]]) {
+; OPT-NEXT:  [[ENTRY:.*:]]
+; OPT-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr addrspace(3) [[ADDR]], i32 4
+; OPT-NEXT:    [[VAL:%.*]] = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) [[GEP]])
+; OPT-NEXT:    store <2 x i32> [[VAL]], ptr addrspace(1) [[USE]], align 8
+; OPT-NEXT:    ret void
+;
+entry:
+  %gep = getelementptr i64, ptr addrspace(3) %addr, i32 4
+  %val = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32.p3(ptr addrspace(3) %gep)
+  store <2 x i32> %val, ptr addrspace(1) %use
+  ret void
+}
+;.
+; Check Function Attribute on decl
+; OPT: declare <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) captures(none)) #[[ATTR0:[0-9]+]]
+declare <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3))
+; OPT: attributes #[[ATTR0]] = { convergent nocallback nofree nounwind willreturn memory(argmem: read) }
+; OPT: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+;.
+; OPT: [[META0]] = !{[[META1:![0-9]+]]}
+; OPT: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], !"callee: %addr"}
+; OPT: [[META2]] = distinct !{[[META2]], !"callee"}
+; OPT: [[META3]] = !{[[META4:![0-9]+]]}
+; OPT: [[META4]] = distinct !{[[META4]], [[META2]], !"callee: %use"}
+;.


        


More information about the llvm-commits mailing list