[llvm-branch-commits] [llvm] InferAddressSpaces: Handle masked load and store intrinsics (PR #102007)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Aug 5 08:57:36 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/102007.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp (+9-7)
- (added) llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll (+68)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 87b885447cc02..2ddf24be67702 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -369,13 +369,13 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
Value *OldV,
Value *NewV) const {
Module *M = II->getParent()->getParent()->getParent();
-
- switch (II->getIntrinsicID()) {
- case Intrinsic::objectsize: {
+ Intrinsic::ID IID = II->getIntrinsicID();
+ switch (IID) {
+ case Intrinsic::objectsize:
+ case Intrinsic::masked_load: {
Type *DestTy = II->getType();
Type *SrcTy = NewV->getType();
- Function *NewDecl =
- Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy});
+ Function *NewDecl = Intrinsic::getDeclaration(M, IID, {DestTy, SrcTy});
II->setArgOperand(0, NewV);
II->setCalledFunction(NewDecl);
return true;
@@ -386,12 +386,12 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
case Intrinsic::masked_gather: {
Type *RetTy = II->getType();
Type *NewPtrTy = NewV->getType();
- Function *NewDecl =
- Intrinsic::getDeclaration(M, II->getIntrinsicID(), {RetTy, NewPtrTy});
+ Function *NewDecl = Intrinsic::getDeclaration(M, IID, {RetTy, NewPtrTy});
II->setArgOperand(0, NewV);
II->setCalledFunction(NewDecl);
return true;
}
+ case Intrinsic::masked_store:
case Intrinsic::masked_scatter: {
Type *ValueTy = II->getOperand(0)->getType();
Type *NewPtrTy = NewV->getType();
@@ -429,11 +429,13 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(
appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
PostorderStack, Visited);
break;
+ case Intrinsic::masked_load:
case Intrinsic::masked_gather:
case Intrinsic::prefetch:
appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
PostorderStack, Visited);
break;
+ case Intrinsic::masked_store:
case Intrinsic::masked_scatter:
appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(1),
PostorderStack, Visited);
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll
new file mode 100644
index 0000000000000..e14dfd055cbe8
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s
+
+define <32 x i32> @masked_load_v32i32_global_to_flat(ptr addrspace(1) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define <32 x i32> @masked_load_v32i32_global_to_flat(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT: [[LOAD:%.*]] = call <32 x i32> @llvm.masked.load.v32i32.p1(ptr addrspace(1) [[PTR]], i32 8, <32 x i1> [[MASK]], <32 x i32> zeroinitializer)
+; CHECK-NEXT: ret <32 x i32> [[LOAD]]
+;
+ %cast = addrspacecast ptr addrspace(1) %ptr to ptr
+ %load = call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %cast, i32 8, <32 x i1> %mask, <32 x i32> zeroinitializer)
+ ret <32 x i32> %load
+}
+define <32 x i32> @masked_load_v32i32_local_to_flat(ptr addrspace(3) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define <32 x i32> @masked_load_v32i32_local_to_flat(
+; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT: [[LOAD:%.*]] = call <32 x i32> @llvm.masked.load.v32i32.p3(ptr addrspace(3) [[PTR]], i32 8, <32 x i1> [[MASK]], <32 x i32> zeroinitializer)
+; CHECK-NEXT: ret <32 x i32> [[LOAD]]
+;
+ %cast = addrspacecast ptr addrspace(3) %ptr to ptr
+ %load = call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %cast, i32 8, <32 x i1> %mask, <32 x i32> zeroinitializer)
+ ret <32 x i32> %load
+}
+
+define <32 x i32> @masked_load_v32i32_private_to_flat(ptr addrspace(5) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define <32 x i32> @masked_load_v32i32_private_to_flat(
+; CHECK-SAME: ptr addrspace(5) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT: [[LOAD:%.*]] = call <32 x i32> @llvm.masked.load.v32i32.p5(ptr addrspace(5) [[PTR]], i32 8, <32 x i1> [[MASK]], <32 x i32> zeroinitializer)
+; CHECK-NEXT: ret <32 x i32> [[LOAD]]
+;
+ %cast = addrspacecast ptr addrspace(5) %ptr to ptr
+ %load = call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %cast, i32 8, <32 x i1> %mask, <32 x i32> zeroinitializer)
+ ret <32 x i32> %load
+}
+
+define void @masked_store_v32i32_global_to_flat(ptr addrspace(1) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define void @masked_store_v32i32_global_to_flat(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT: tail call void @llvm.masked.store.v32i32.p1(<32 x i32> zeroinitializer, ptr addrspace(1) [[PTR]], i32 128, <32 x i1> [[MASK]])
+; CHECK-NEXT: ret void
+;
+ %cast = addrspacecast ptr addrspace(1) %ptr to ptr
+ tail call void @llvm.masked.store.v32i32.p0(<32 x i32> zeroinitializer, ptr %cast, i32 128, <32 x i1> %mask)
+ ret void
+}
+
+define void @masked_store_v32i32_local_to_flat(ptr addrspace(3) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define void @masked_store_v32i32_local_to_flat(
+; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT: tail call void @llvm.masked.store.v32i32.p3(<32 x i32> zeroinitializer, ptr addrspace(3) [[PTR]], i32 128, <32 x i1> [[MASK]])
+; CHECK-NEXT: ret void
+;
+ %cast = addrspacecast ptr addrspace(3) %ptr to ptr
+ tail call void @llvm.masked.store.v32i32.p0(<32 x i32> zeroinitializer, ptr %cast, i32 128, <32 x i1> %mask)
+ ret void
+}
+
+define void @masked_store_v32i32_private_to_flat(ptr addrspace(5) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define void @masked_store_v32i32_private_to_flat(
+; CHECK-SAME: ptr addrspace(5) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT: tail call void @llvm.masked.store.v32i32.p5(<32 x i32> zeroinitializer, ptr addrspace(5) [[PTR]], i32 128, <32 x i1> [[MASK]])
+; CHECK-NEXT: ret void
+;
+ %cast = addrspacecast ptr addrspace(5) %ptr to ptr
+ tail call void @llvm.masked.store.v32i32.p0(<32 x i32> zeroinitializer, ptr %cast, i32 128, <32 x i1> %mask)
+ ret void
+}
+
``````````
</details>
https://github.com/llvm/llvm-project/pull/102007
More information about the llvm-branch-commits
mailing list