[llvm] d037b23 - [Attributor] Teach AAMemoryLocation about constant GPU memory
Johannes Doerfert via llvm-commits
llvm-commits at lists.llvm.org
Thu May 18 13:28:13 PDT 2023
Author: Johannes Doerfert
Date: 2023-05-18T13:27:43-07:00
New Revision: d037b237de6a4642cc0e01c0d6a5805ae52cb944
URL: https://github.com/llvm/llvm-project/commit/d037b237de6a4642cc0e01c0d6a5805ae52cb944
DIFF: https://github.com/llvm/llvm-project/commit/d037b237de6a4642cc0e01c0d6a5805ae52cb944.diff
LOG: [Attributor] Teach AAMemoryLocation about constant GPU memory
AS(4), when targeting GPUs, is constant. Accesses to constant memory are
(historically) not treated as "memory accesses", hence we should deduce
`memory(none)` for those.
Added:
llvm/test/Transforms/Attributor/memory_locations_gpu.ll
Modified:
llvm/include/llvm/Transforms/IPO/Attributor.h
llvm/lib/Transforms/IPO/Attributor.cpp
llvm/lib/Transforms/IPO/AttributorAttributes.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 482e0868e7e7a..526929454e03e 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -164,6 +164,9 @@ enum class GPUAddressSpace : unsigned {
Local = 5,
};
+/// Return true iff \p M target a GPU (and we can use GPU AS reasoning).
+bool isGPU(const Module &M);
+
/// Flags to distinguish intra-procedural queries from *potentially*
/// inter-procedural queries. Not that information can be valid for both and
/// therefore both bits might be set.
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 4bb1aad47c0ff..f6e08a9216dec 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -186,6 +186,11 @@ ChangeStatus &llvm::operator&=(ChangeStatus &L, ChangeStatus R) {
}
///}
+bool AA::isGPU(const Module &M) {
+ Triple T(M.getTargetTriple());
+ return T.isAMDGPU() || T.isNVPTX();
+}
+
bool AA::isNoSyncInst(Attributor &A, const Instruction &I,
const AbstractAttribute &QueryingAA) {
// We are looking for volatile instructions or non-relaxed atomics.
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 1c7441a5ae851..0b6ea681655a1 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1124,8 +1124,7 @@ struct AAPointerInfoImpl
// outlive a GPU kernel. This is true for shared, constant, and local
// globals on AMD and NVIDIA GPUs.
auto HasKernelLifetime = [&](Value *V, Module &M) {
- Triple T(M.getTargetTriple());
- if (!(T.isAMDGPU() || T.isNVPTX()))
+ if (!AA::isGPU(M))
return false;
switch (AA::GPUAddressSpace(V->getType()->getPointerAddressSpace())) {
case AA::GPUAddressSpace::Shared:
@@ -8662,7 +8661,8 @@ struct AAMemoryLocationImpl : public AAMemoryLocation {
/// Determine the underlying locations kinds for \p Ptr, e.g., globals or
/// arguments, and update the state and access map accordingly.
void categorizePtrValue(Attributor &A, const Instruction &I, const Value &Ptr,
- AAMemoryLocation::StateType &State, bool &Changed);
+ AAMemoryLocation::StateType &State, bool &Changed,
+ unsigned AccessAS = 0);
/// Used to allocate access sets.
BumpPtrAllocator &Allocator;
@@ -8670,14 +8670,24 @@ struct AAMemoryLocationImpl : public AAMemoryLocation {
void AAMemoryLocationImpl::categorizePtrValue(
Attributor &A, const Instruction &I, const Value &Ptr,
- AAMemoryLocation::StateType &State, bool &Changed) {
+ AAMemoryLocation::StateType &State, bool &Changed, unsigned AccessAS) {
LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Categorize pointer locations for "
<< Ptr << " ["
<< getMemoryLocationsAsStr(State.getAssumed()) << "]\n");
auto Pred = [&](Value &Obj) {
+ unsigned ObjectAS = Obj.getType()->getPointerAddressSpace();
// TODO: recognize the TBAA used for constant accesses.
MemoryLocationsKind MLK = NO_LOCATIONS;
+
+ // Filter accesses to constant (GPU) memory if we have an AS at the access
+ // site or the object is known to actually have the associated AS.
+ if ((AccessAS == (unsigned)AA::GPUAddressSpace::Constant ||
+ (ObjectAS == (unsigned)AA::GPUAddressSpace::Constant &&
+ isIdentifiedObject(&Obj))) &&
+ AA::isGPU(*I.getModule()))
+ return true;
+
if (isa<UndefValue>(&Obj))
return true;
if (isa<Argument>(&Obj)) {
@@ -8701,8 +8711,8 @@ void AAMemoryLocationImpl::categorizePtrValue(
else
MLK = NO_GLOBAL_EXTERNAL_MEM;
} else if (isa<ConstantPointerNull>(&Obj) &&
- !NullPointerIsDefined(getAssociatedFunction(),
- Ptr.getType()->getPointerAddressSpace())) {
+ (!NullPointerIsDefined(getAssociatedFunction(), AccessAS) ||
+ !NullPointerIsDefined(getAssociatedFunction(), ObjectAS))) {
return true;
} else if (isa<AllocaInst>(&Obj)) {
MLK = NO_LOCAL_MEM;
@@ -8840,7 +8850,8 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
LLVM_DEBUG(
dbgs() << "[AAMemoryLocation] Categorize memory access with pointer: "
<< I << " [" << *Ptr << "]\n");
- categorizePtrValue(A, I, *Ptr, AccessedLocs, Changed);
+ categorizePtrValue(A, I, *Ptr, AccessedLocs, Changed,
+ Ptr->getType()->getPointerAddressSpace());
return AccessedLocs.getAssumed();
}
diff --git a/llvm/test/Transforms/Attributor/memory_locations_gpu.ll b/llvm/test/Transforms/Attributor/memory_locations_gpu.ll
new file mode 100644
index 0000000000000..7637991ff07a7
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/memory_locations_gpu.ll
@@ -0,0 +1,107 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "amdgcn-amd-amdhsa"
+
+ at G = external dso_local addrspace(4) global i32, align 4
+
+declare ptr @ptr() memory(none)
+declare ptr addrspace(4) @ptr_to_const() memory(none)
+declare ptr addrspace(3) @ptr_to_shared() memory(none)
+
+;.
+; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external dso_local addrspace(4) global i32, align 4
+;.
+; Should be memory(none)
+define i32 @test_const_as_global1() {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define {{[^@]+}}@test_const_as_global1
+; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr addrspace(4) @G, align 4
+; CHECK-NEXT: ret i32 [[L1]]
+;
+ %l1 = load i32, ptr addrspace(4) @G
+ ret i32 %l1
+}
+; Should be memory(none)
+define i32 @test_const_as_global2() {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define {{[^@]+}}@test_const_as_global2
+; CHECK-SAME: () #[[ATTR1]] {
+; CHECK-NEXT: [[L2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @G to ptr), align 4
+; CHECK-NEXT: ret i32 [[L2]]
+;
+ %l2 = load i32, ptr addrspacecast (ptr addrspace(4) @G to ptr)
+ ret i32 %l2
+}
+; Should be memory(none)
+define i32 @test_const_as_call1() {
+; CHECK: Function Attrs: nosync memory(read)
+; CHECK-LABEL: define {{[^@]+}}@test_const_as_call1
+; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: [[P1:%.*]] = call ptr addrspace(4) @ptr_to_const()
+; CHECK-NEXT: [[C1:%.*]] = addrspacecast ptr addrspace(4) [[P1]] to ptr
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[C1]], align 4
+; CHECK-NEXT: ret i32 [[L1]]
+;
+ %p1 = call ptr addrspace(4) @ptr_to_const()
+ %c1 = addrspacecast ptr addrspace(4) %p1 to ptr
+ %l1 = load i32, ptr %c1
+ ret i32 %l1
+}
+; Should be memory(none)
+define i32 @test_const_as_call2() {
+; CHECK: Function Attrs: nosync memory(none)
+; CHECK-LABEL: define {{[^@]+}}@test_const_as_call2
+; CHECK-SAME: () #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT: [[P2:%.*]] = call ptr @ptr()
+; CHECK-NEXT: [[C2:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(4)
+; CHECK-NEXT: [[L2:%.*]] = load i32, ptr addrspace(4) [[C2]], align 4
+; CHECK-NEXT: ret i32 [[L2]]
+;
+ %p2 = call ptr @ptr()
+ %c2 = addrspacecast ptr %p2 to ptr addrspace(4)
+ %l2 = load i32, ptr addrspace(4) %c2
+ ret i32 %l2
+}
+
+; Should be memory(read)
+define i32 @test_shared_as_call1() {
+; CHECK: Function Attrs: nosync memory(read)
+; CHECK-LABEL: define {{[^@]+}}@test_shared_as_call1
+; CHECK-SAME: () #[[ATTR2]] {
+; CHECK-NEXT: [[P1:%.*]] = call ptr addrspace(3) @ptr_to_shared()
+; CHECK-NEXT: [[C1:%.*]] = addrspacecast ptr addrspace(3) [[P1]] to ptr
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[C1]], align 4
+; CHECK-NEXT: ret i32 [[L1]]
+;
+ %p1 = call ptr addrspace(3) @ptr_to_shared()
+ %c1 = addrspacecast ptr addrspace(3) %p1 to ptr
+ %l1 = load i32, ptr %c1
+ ret i32 %l1
+}
+; Should be memory(read)
+define i32 @test_shared_as_call2() {
+; CHECK: Function Attrs: nosync memory(read)
+; CHECK-LABEL: define {{[^@]+}}@test_shared_as_call2
+; CHECK-SAME: () #[[ATTR2]] {
+; CHECK-NEXT: [[P2:%.*]] = call ptr @ptr()
+; CHECK-NEXT: [[C2:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(3)
+; CHECK-NEXT: [[L2:%.*]] = load i32, ptr addrspace(3) [[C2]], align 4
+; CHECK-NEXT: ret i32 [[L2]]
+;
+ %p2 = call ptr @ptr()
+ %c2 = addrspacecast ptr %p2 to ptr addrspace(3)
+ %l2 = load i32, ptr addrspace(3) %c2
+ ret i32 %l2
+}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { memory(none) }
+; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR2]] = { nosync memory(read) }
+; CHECK: attributes #[[ATTR3]] = { nosync memory(none) }
+;.
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CGSCC: {{.*}}
+; TUNIT: {{.*}}
More information about the llvm-commits
mailing list