[llvm] InferAddressSpaces: Handle masked load and store intrinsics (PR #102007)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 5 13:14:26 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/102007
>From fd7b3c219a08f060e9b40dbe0017a33683bcf8b3 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 5 Aug 2024 19:49:31 +0400
Subject: [PATCH] InferAddressSpaces: Handle masked load and store intrinsics
---
.../Transforms/Scalar/InferAddressSpaces.cpp | 16 +++--
.../AMDGPU/masked-load-store.ll | 68 +++++++++++++++++++
2 files changed, 77 insertions(+), 7 deletions(-)
create mode 100644 llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 87b885447cc02..2ddf24be67702 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -369,13 +369,13 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
Value *OldV,
Value *NewV) const {
Module *M = II->getParent()->getParent()->getParent();
-
- switch (II->getIntrinsicID()) {
- case Intrinsic::objectsize: {
+ Intrinsic::ID IID = II->getIntrinsicID();
+ switch (IID) {
+ case Intrinsic::objectsize:
+ case Intrinsic::masked_load: {
Type *DestTy = II->getType();
Type *SrcTy = NewV->getType();
- Function *NewDecl =
- Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy});
+ Function *NewDecl = Intrinsic::getDeclaration(M, IID, {DestTy, SrcTy});
II->setArgOperand(0, NewV);
II->setCalledFunction(NewDecl);
return true;
@@ -386,12 +386,12 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
case Intrinsic::masked_gather: {
Type *RetTy = II->getType();
Type *NewPtrTy = NewV->getType();
- Function *NewDecl =
- Intrinsic::getDeclaration(M, II->getIntrinsicID(), {RetTy, NewPtrTy});
+ Function *NewDecl = Intrinsic::getDeclaration(M, IID, {RetTy, NewPtrTy});
II->setArgOperand(0, NewV);
II->setCalledFunction(NewDecl);
return true;
}
+ case Intrinsic::masked_store:
case Intrinsic::masked_scatter: {
Type *ValueTy = II->getOperand(0)->getType();
Type *NewPtrTy = NewV->getType();
@@ -429,11 +429,13 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(
appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
PostorderStack, Visited);
break;
+ case Intrinsic::masked_load:
case Intrinsic::masked_gather:
case Intrinsic::prefetch:
appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
PostorderStack, Visited);
break;
+ case Intrinsic::masked_store:
case Intrinsic::masked_scatter:
appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(1),
PostorderStack, Visited);
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll
new file mode 100644
index 0000000000000..e14dfd055cbe8
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/masked-load-store.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s
+
+define <32 x i32> @masked_load_v32i32_global_to_flat(ptr addrspace(1) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define <32 x i32> @masked_load_v32i32_global_to_flat(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT: [[LOAD:%.*]] = call <32 x i32> @llvm.masked.load.v32i32.p1(ptr addrspace(1) [[PTR]], i32 8, <32 x i1> [[MASK]], <32 x i32> zeroinitializer)
+; CHECK-NEXT: ret <32 x i32> [[LOAD]]
+;
+ %cast = addrspacecast ptr addrspace(1) %ptr to ptr
+ %load = call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %cast, i32 8, <32 x i1> %mask, <32 x i32> zeroinitializer)
+ ret <32 x i32> %load
+}
+define <32 x i32> @masked_load_v32i32_local_to_flat(ptr addrspace(3) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define <32 x i32> @masked_load_v32i32_local_to_flat(
+; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT: [[LOAD:%.*]] = call <32 x i32> @llvm.masked.load.v32i32.p3(ptr addrspace(3) [[PTR]], i32 8, <32 x i1> [[MASK]], <32 x i32> zeroinitializer)
+; CHECK-NEXT: ret <32 x i32> [[LOAD]]
+;
+ %cast = addrspacecast ptr addrspace(3) %ptr to ptr
+ %load = call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %cast, i32 8, <32 x i1> %mask, <32 x i32> zeroinitializer)
+ ret <32 x i32> %load
+}
+
+define <32 x i32> @masked_load_v32i32_private_to_flat(ptr addrspace(5) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define <32 x i32> @masked_load_v32i32_private_to_flat(
+; CHECK-SAME: ptr addrspace(5) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT: [[LOAD:%.*]] = call <32 x i32> @llvm.masked.load.v32i32.p5(ptr addrspace(5) [[PTR]], i32 8, <32 x i1> [[MASK]], <32 x i32> zeroinitializer)
+; CHECK-NEXT: ret <32 x i32> [[LOAD]]
+;
+ %cast = addrspacecast ptr addrspace(5) %ptr to ptr
+ %load = call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %cast, i32 8, <32 x i1> %mask, <32 x i32> zeroinitializer)
+ ret <32 x i32> %load
+}
+
+define void @masked_store_v32i32_global_to_flat(ptr addrspace(1) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define void @masked_store_v32i32_global_to_flat(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT: tail call void @llvm.masked.store.v32i32.p1(<32 x i32> zeroinitializer, ptr addrspace(1) [[PTR]], i32 128, <32 x i1> [[MASK]])
+; CHECK-NEXT: ret void
+;
+ %cast = addrspacecast ptr addrspace(1) %ptr to ptr
+ tail call void @llvm.masked.store.v32i32.p0(<32 x i32> zeroinitializer, ptr %cast, i32 128, <32 x i1> %mask)
+ ret void
+}
+
+define void @masked_store_v32i32_local_to_flat(ptr addrspace(3) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define void @masked_store_v32i32_local_to_flat(
+; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT: tail call void @llvm.masked.store.v32i32.p3(<32 x i32> zeroinitializer, ptr addrspace(3) [[PTR]], i32 128, <32 x i1> [[MASK]])
+; CHECK-NEXT: ret void
+;
+ %cast = addrspacecast ptr addrspace(3) %ptr to ptr
+ tail call void @llvm.masked.store.v32i32.p0(<32 x i32> zeroinitializer, ptr %cast, i32 128, <32 x i1> %mask)
+ ret void
+}
+
+define void @masked_store_v32i32_private_to_flat(ptr addrspace(5) %ptr, <32 x i1> %mask) {
+; CHECK-LABEL: define void @masked_store_v32i32_private_to_flat(
+; CHECK-SAME: ptr addrspace(5) [[PTR:%.*]], <32 x i1> [[MASK:%.*]]) {
+; CHECK-NEXT: tail call void @llvm.masked.store.v32i32.p5(<32 x i32> zeroinitializer, ptr addrspace(5) [[PTR]], i32 128, <32 x i1> [[MASK]])
+; CHECK-NEXT: ret void
+;
+ %cast = addrspacecast ptr addrspace(5) %ptr to ptr
+ tail call void @llvm.masked.store.v32i32.p0(<32 x i32> zeroinitializer, ptr %cast, i32 128, <32 x i1> %mask)
+ ret void
+}
+
More information about the llvm-commits
mailing list