[llvm] [AMDGPU] Make ds/global load intrinsics IntrArgMemOnly (PR #152792)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 9 18:43:50 PDT 2025
https://github.com/choikwa updated https://github.com/llvm/llvm-project/pull/152792
>From e16ee1e521bff7c45c67abcbeeca1eaa25e2d8f6 Mon Sep 17 00:00:00 2001
From: Kevin Choi <kevin.choi at amd.com>
Date: Fri, 8 Aug 2025 15:26:15 -0500
Subject: [PATCH 1/2] [AMDGPU] Make ds/global load intrinsics IntrArgMemOnly
This along with IntrReadMem means that the Intrinsic only reads memory through the given argument ptr and its derivatives.
This allows passes like Inliner to attach alias.scope to the call instruction as it sees that no other memory is accessed.
---
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 90cfd8cedd51b..be6df257f668b 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -3087,7 +3087,7 @@ class AMDGPULoadIntrinsic<LLVMType ptr_ty>:
Intrinsic<
[llvm_any_ty],
[ptr_ty],
- [IntrReadMem, IntrWillReturn, IntrConvergent, NoCapture<ArgIndex<0>>, IntrNoCallback, IntrNoFree],
+ [IntrReadMem, IntrArgMemOnly, IntrWillReturn, IntrConvergent, NoCapture<ArgIndex<0>>, IntrNoCallback, IntrNoFree],
"",
[SDNPMemOperand]
>;
>From 7e28dd4799700bb0fff198be32aa0e85a64b8301 Mon Sep 17 00:00:00 2001
From: Kevin Choi <kevin.choi at amd.com>
Date: Sat, 9 Aug 2025 20:41:35 -0500
Subject: [PATCH 2/2] Add testcase that shows generation of !alias.scope on
ds.read intrinsic call and memory(argmem: read) function attribute
---
llvm/test/CodeGen/AMDGPU/load-intrinsics.ll | 43 +++++++++++++++++++++
1 file changed, 43 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/load-intrinsics.ll
diff --git a/llvm/test/CodeGen/AMDGPU/load-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/load-intrinsics.ll
new file mode 100644
index 0000000000000..305df3f3a9713
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-intrinsics.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=amdgcn --passes=inline --enable-noalias-to-md-conversion -S %s | FileCheck --check-prefix=OPT %s
+
+declare <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32.p3(ptr addrspace(3))
+
+define amdgpu_ps void @foo(ptr addrspace(3) %addr_f, ptr addrspace(1) %use_f) {
+; OPT-LABEL: define amdgpu_ps void @foo(
+; OPT-SAME: ptr addrspace(3) [[ADDR_F:%.*]], ptr addrspace(1) [[USE_F:%.*]]) {
+; OPT-NEXT: [[ENTRY:.*:]]
+; OPT-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]])
+; OPT-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
+; OPT-NEXT: [[GEP_I:%.*]] = getelementptr i64, ptr addrspace(3) [[ADDR_F]], i32 4
+; OPT-NEXT: [[VAL_I:%.*]] = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) [[GEP_I]]), !alias.scope [[META0]], !noalias [[META3]]
+; OPT-NEXT: store <2 x i32> [[VAL_I]], ptr addrspace(1) [[USE_F]], align 8, !alias.scope [[META3]], !noalias [[META0]]
+; OPT-NEXT: ret void
+;
+entry:
+ call void @bar(ptr addrspace(3) %addr_f, ptr addrspace(1) %use_f)
+ ret void
+}
+
+define amdgpu_ps void @bar(ptr addrspace(3) noalias %addr, ptr addrspace(1) noalias %use) {
+; OPT-LABEL: define amdgpu_ps void @bar(
+; OPT-SAME: ptr addrspace(3) noalias [[ADDR:%.*]], ptr addrspace(1) noalias [[USE:%.*]]) {
+; OPT-NEXT: [[ENTRY:.*:]]
+; OPT-NEXT: [[GEP:%.*]] = getelementptr i64, ptr addrspace(3) [[ADDR]], i32 4
+; OPT-NEXT: [[VAL:%.*]] = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) [[GEP]])
+; OPT-NEXT: store <2 x i32> [[VAL]], ptr addrspace(1) [[USE]], align 8
+; OPT-NEXT: ret void
+;
+entry:
+ %gep = getelementptr i64, ptr addrspace(3) %addr, i32 4
+ %val = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32.p3(ptr addrspace(3) %gep)
+ store <2 x i32> %val, ptr addrspace(1) %use
+ ret void
+}
+;.
+; OPT: [[META0]] = !{[[META1:![0-9]+]]}
+; OPT: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], !"bar: %addr"}
+; OPT: [[META2]] = distinct !{[[META2]], !"bar"}
+; OPT: [[META3]] = !{[[META4:![0-9]+]]}
+; OPT: [[META4]] = distinct !{[[META4]], [[META2]], !"bar: %use"}
+;.
More information about the llvm-commits
mailing list