[llvm] InferAddressSpaces: Handle masked load and store intrinsics (PR #102007)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 5 13:14:26 PDT 2024


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/102007

>From fd7b3c219a08f060e9b40dbe0017a33683bcf8b3 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 5 Aug 2024 19:49:31 +0400
Subject: [PATCH] InferAddressSpaces: Handle masked load and store intrinsics

---
 .../Transforms/Scalar/InferAddressSpaces.cpp  | 16 +++--
 .../AMDGPU/masked-load-store.ll               | 68 +++++++++++++++++++
 2 files changed, 77 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll

diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 87b885447cc02..2ddf24be67702 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -369,13 +369,13 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
                                                       Value *OldV,
                                                       Value *NewV) const {
   Module *M = II->getParent()->getParent()->getParent();
-
-  switch (II->getIntrinsicID()) {
-  case Intrinsic::objectsize: {
+  Intrinsic::ID IID = II->getIntrinsicID();
+  switch (IID) {
+  case Intrinsic::objectsize:
+  case Intrinsic::masked_load: {
     Type *DestTy = II->getType();
     Type *SrcTy = NewV->getType();
-    Function *NewDecl =
-        Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy});
+    Function *NewDecl = Intrinsic::getDeclaration(M, IID, {DestTy, SrcTy});
     II->setArgOperand(0, NewV);
     II->setCalledFunction(NewDecl);
     return true;
@@ -386,12 +386,12 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
   case Intrinsic::masked_gather: {
     Type *RetTy = II->getType();
     Type *NewPtrTy = NewV->getType();
-    Function *NewDecl =
-        Intrinsic::getDeclaration(M, II->getIntrinsicID(), {RetTy, NewPtrTy});
+    Function *NewDecl = Intrinsic::getDeclaration(M, IID, {RetTy, NewPtrTy});
     II->setArgOperand(0, NewV);
     II->setCalledFunction(NewDecl);
     return true;
   }
+  case Intrinsic::masked_store:
   case Intrinsic::masked_scatter: {
     Type *ValueTy = II->getOperand(0)->getType();
     Type *NewPtrTy = NewV->getType();
@@ -429,11 +429,13 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(
     appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
                                                  PostorderStack, Visited);
     break;
+  case Intrinsic::masked_load:
   case Intrinsic::masked_gather:
   case Intrinsic::prefetch:
     appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
                                                  PostorderStack, Visited);
     break;
+  case Intrinsic::masked_store:
   case Intrinsic::masked_scatter:
     appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(1),
                                                  PostorderStack, Visited);
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll
new file mode 100644
index 0000000000000..e14dfd055cbe8
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s
+
+define <32 x i32> @masked_load_v32i32_global_to_flat(ptr addrspace(1) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define <32 x i32> @masked_load_v32i32_global_to_flat(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT:    [[LOAD:%.*]] = call <32 x i32> @llvm.masked.load.v32i32.p1(ptr addrspace(1) [[PTR]], i32 8, <32 x i1> [[MASK]], <32 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <32 x i32> [[LOAD]]
+;
+  %cast = addrspacecast ptr addrspace(1) %ptr to ptr
+  %load = call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %cast, i32 8, <32 x i1> %mask, <32 x i32> zeroinitializer)
+  ret <32 x i32> %load
+}
+define <32 x i32> @masked_load_v32i32_local_to_flat(ptr addrspace(3) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define <32 x i32> @masked_load_v32i32_local_to_flat(
+; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT:    [[LOAD:%.*]] = call <32 x i32> @llvm.masked.load.v32i32.p3(ptr addrspace(3) [[PTR]], i32 8, <32 x i1> [[MASK]], <32 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <32 x i32> [[LOAD]]
+;
+  %cast = addrspacecast ptr addrspace(3) %ptr to ptr
+  %load = call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %cast, i32 8, <32 x i1> %mask, <32 x i32> zeroinitializer)
+  ret <32 x i32> %load
+}
+
+define <32 x i32> @masked_load_v32i32_private_to_flat(ptr addrspace(5) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define <32 x i32> @masked_load_v32i32_private_to_flat(
+; CHECK-SAME: ptr addrspace(5) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT:    [[LOAD:%.*]] = call <32 x i32> @llvm.masked.load.v32i32.p5(ptr addrspace(5) [[PTR]], i32 8, <32 x i1> [[MASK]], <32 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <32 x i32> [[LOAD]]
+;
+  %cast = addrspacecast ptr addrspace(5) %ptr to ptr
+  %load = call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %cast, i32 8, <32 x i1> %mask, <32 x i32> zeroinitializer)
+  ret <32 x i32> %load
+}
+
+define void  @masked_store_v32i32_global_to_flat(ptr addrspace(1) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define void @masked_store_v32i32_global_to_flat(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT:    tail call void @llvm.masked.store.v32i32.p1(<32 x i32> zeroinitializer, ptr addrspace(1) [[PTR]], i32 128, <32 x i1> [[MASK]])
+; CHECK-NEXT:    ret void
+;
+  %cast = addrspacecast ptr addrspace(1) %ptr to ptr
+  tail call void @llvm.masked.store.v32i32.p0(<32 x i32> zeroinitializer, ptr %cast, i32 128, <32 x i1> %mask)
+  ret void
+}
+
+define void  @masked_store_v32i32_local_to_flat(ptr addrspace(3) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define void @masked_store_v32i32_local_to_flat(
+; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT:    tail call void @llvm.masked.store.v32i32.p3(<32 x i32> zeroinitializer, ptr addrspace(3) [[PTR]], i32 128, <32 x i1> [[MASK]])
+; CHECK-NEXT:    ret void
+;
+  %cast = addrspacecast ptr addrspace(3) %ptr to ptr
+  tail call void @llvm.masked.store.v32i32.p0(<32 x i32> zeroinitializer, ptr %cast, i32 128, <32 x i1> %mask)
+  ret void
+}
+
+define void  @masked_store_v32i32_private_to_flat(ptr addrspace(5) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define void @masked_store_v32i32_private_to_flat(
+; CHECK-SAME: ptr addrspace(5) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT:    tail call void @llvm.masked.store.v32i32.p5(<32 x i32> zeroinitializer, ptr addrspace(5) [[PTR]], i32 128, <32 x i1> [[MASK]])
+; CHECK-NEXT:    ret void
+;
+  %cast = addrspacecast ptr addrspace(5) %ptr to ptr
+  tail call void @llvm.masked.store.v32i32.p0(<32 x i32> zeroinitializer, ptr %cast, i32 128, <32 x i1> %mask)
+  ret void
+}
+



More information about the llvm-commits mailing list