[llvm] [Attributor] Take the address space from addrspacecast directly (PR #108258)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 12 20:03:17 PDT 2024
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108258
>From d74911628f69b0ac0d88d8f138302b9e9f02284d Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Wed, 11 Sep 2024 12:23:32 -0400
Subject: [PATCH] [Attributor] Take the address space from addrspacecast
directly
If the value to be analyzed is directly from addrspacecast, we take the source
address space directly. This is to improve the case where in
`AMDGPUPromoteKernelArgumentsPass`, the kernel argument is promoted by
insertting an addrspacecast directly from a generic pointer. However, during the
analysis, the underlying object will be the generic pointer, instead of the
addrspacecast, thus the inferred address space is the generic one, which is not
ideal.
---
.../Transforms/IPO/AttributorAttributes.cpp | 73 ++++++++++++++++---
llvm/test/CodeGen/AMDGPU/aa-as-infer.ll | 35 +++++++++
llvm/test/CodeGen/AMDGPU/addrspacecast.ll | 15 +++-
3 files changed, 108 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 217c7cccb5775a..7c8dc61b77c274 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12571,8 +12571,35 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
void initialize(Attributor &A) override {
assert(getAssociatedType()->isPtrOrPtrVectorTy() &&
"Associated value is not a pointer");
- if (getAssociatedType()->getPointerAddressSpace())
+ // If the pointer already has non-flat address space, we assume it is the
+ // correct one.
+ if (getAssociatedType()->getPointerAddressSpace()) {
+ [[maybe_unused]] bool R =
+ takeAddressSpace(getAssociatedType()->getPointerAddressSpace());
+ assert(R && "the take should happen");
indicateOptimisticFixpoint();
+ return;
+ }
+ // If the pointer is an addrspacecast, it has to be from a non-flat to flat.
+ // We assume the source address space is the correct one.
+ Value *V = &getAssociatedValue();
+ if (auto *ASCI = dyn_cast<AddrSpaceCastInst>(V)) {
+ assert(ASCI->getDestAddressSpace() == 0 &&
+ "The destination address space should be a flat address space");
+ [[maybe_unused]] bool R = takeAddressSpace(ASCI->getSrcAddressSpace());
+ assert(R && "the take should happen");
+ indicateOptimisticFixpoint();
+ return;
+ }
+ if (auto *C = dyn_cast<ConstantExpr>(V)) {
+ if (C->getOpcode() == Instruction::AddrSpaceCast) {
+ [[maybe_unused]] bool R = takeAddressSpace(
+ C->getOperand(0)->getType()->getPointerAddressSpace());
+ assert(R && "the take should happen");
+ indicateOptimisticFixpoint();
+ return;
+ }
+ }
}
ChangeStatus updateImpl(Attributor &A) override {
@@ -12582,6 +12609,23 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
auto Pred = [&](Value &Obj) {
if (isa<UndefValue>(&Obj))
return true;
+ // If an argument in generic address space has addrspace cast uses, and
+ // those casts are same, then we take the dst addrspace.
+ if (auto *Arg = dyn_cast<Argument>(&Obj)) {
+ if (Arg->getType()->getPointerAddressSpace() == 0) {
+ unsigned CastAddrSpace = 0;
+ for (auto *U : Arg->users()) {
+ auto *ASCI = dyn_cast<AddrSpaceCastInst>(U);
+ if (!ASCI)
+ continue;
+ if (CastAddrSpace && CastAddrSpace != ASCI->getDestAddressSpace())
+ return false;
+ CastAddrSpace = ASCI->getDestAddressSpace();
+ }
+ if (CastAddrSpace)
+ return takeAddressSpace(CastAddrSpace);
+ }
+ }
return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
};
@@ -12594,16 +12638,18 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
- Value *AssociatedValue = &getAssociatedValue();
- Value *OriginalValue = peelAddrspacecast(AssociatedValue);
- if (getAddressSpace() == NoAddressSpace ||
- getAddressSpace() == getAssociatedType()->getPointerAddressSpace())
+ unsigned NewAS = getAddressSpace();
+
+ if (NewAS == NoAddressSpace ||
+ NewAS == getAssociatedType()->getPointerAddressSpace())
return ChangeStatus::UNCHANGED;
+ Value *AssociatedValue = &getAssociatedValue();
+ Value *OriginalValue = peelAddrspacecast(AssociatedValue);
PointerType *NewPtrTy =
- PointerType::get(getAssociatedType()->getContext(), getAddressSpace());
+ PointerType::get(getAssociatedType()->getContext(), NewAS);
bool UseOriginalValue =
- OriginalValue->getType()->getPointerAddressSpace() == getAddressSpace();
+ OriginalValue->getType()->getPointerAddressSpace() == NewAS;
bool Changed = false;
@@ -12664,11 +12710,16 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
}
static Value *peelAddrspacecast(Value *V) {
- if (auto *I = dyn_cast<AddrSpaceCastInst>(V))
- return peelAddrspacecast(I->getPointerOperand());
+ if (auto *I = dyn_cast<AddrSpaceCastInst>(V)) {
+ assert(I->getSrcAddressSpace() && "there should not be AS 0 -> AS X");
+ return I->getPointerOperand();
+ }
if (auto *C = dyn_cast<ConstantExpr>(V))
- if (C->getOpcode() == Instruction::AddrSpaceCast)
- return peelAddrspacecast(C->getOperand(0));
+ if (C->getOpcode() == Instruction::AddrSpaceCast) {
+ assert(C->getOperand(0)->getType()->getPointerAddressSpace() &&
+ "there should not be AS 0 -> AS X");
+ return C->getOperand(0);
+ }
return V;
}
};
diff --git a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
index fdc5debb18915c..d1a6414fe49ae1 100644
--- a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
+++ b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
@@ -243,3 +243,38 @@ define void @foo(ptr addrspace(3) %val) {
ret void
}
+define void @kernel_argument_promotion_pattern_intra_procedure(ptr %p, i32 %val) {
+; CHECK-LABEL: define void @kernel_argument_promotion_pattern_intra_procedure(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[P_CAST_0:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[P_CAST_0]], align 4
+; CHECK-NEXT: ret void
+;
+ %p.cast.0 = addrspacecast ptr %p to ptr addrspace(1)
+ %p.cast.1 = addrspacecast ptr addrspace(1) %p.cast.0 to ptr
+ store i32 %val, ptr %p.cast.1
+ ret void
+}
+
+define internal void @use_argument_after_promotion(ptr %p, i32 %val) {
+; CHECK-LABEL: define internal void @use_argument_after_promotion(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[TMP1]], align 4
+; CHECK-NEXT: ret void
+;
+ store i32 %val, ptr %p
+ ret void
+}
+
+define void @kernel_argument_promotion_pattern_inter_procedure(ptr %p, i32 %val) {
+; CHECK-LABEL: define void @kernel_argument_promotion_pattern_inter_procedure(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @use_argument_after_promotion(ptr [[P]], i32 [[VAL]])
+; CHECK-NEXT: ret void
+;
+ %p.cast.0 = addrspacecast ptr %p to ptr addrspace(1)
+ %p.cast.1 = addrspacecast ptr addrspace(1) %p.cast.0 to ptr
+ call void @use_argument_after_promotion(ptr %p.cast.1, i32 %val)
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
index 7336543b41cbc8..3deb882354a3bd 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -215,11 +215,14 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 {
}
; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
+; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
+; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
+
+; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
-; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]]
+; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
%cast = addrspacecast ptr addrspace(3) null to ptr
store volatile i32 7, ptr %cast
@@ -259,10 +262,14 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
; FIXME: Shouldn't need to enable queue ptr
; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
+; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
+; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
+
+; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_private_base
+
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
-; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]]
+; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
%cast = addrspacecast ptr addrspace(5) null to ptr
store volatile i32 7, ptr %cast
More information about the llvm-commits
mailing list