[llvm] d037b23 - [Attributor] Teach AAMemoryLocation about constant GPU memory

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Thu May 18 13:28:13 PDT 2023


Author: Johannes Doerfert
Date: 2023-05-18T13:27:43-07:00
New Revision: d037b237de6a4642cc0e01c0d6a5805ae52cb944

URL: https://github.com/llvm/llvm-project/commit/d037b237de6a4642cc0e01c0d6a5805ae52cb944
DIFF: https://github.com/llvm/llvm-project/commit/d037b237de6a4642cc0e01c0d6a5805ae52cb944.diff

LOG: [Attributor] Teach AAMemoryLocation about constant GPU memory

AS(4), when targeting GPUs, is constant. Accesses to constant memory are
(historically) not treated as "memory accesses", hence we should deduce
`memory(none)` for those.

Added: 
    llvm/test/Transforms/Attributor/memory_locations_gpu.ll

Modified: 
    llvm/include/llvm/Transforms/IPO/Attributor.h
    llvm/lib/Transforms/IPO/Attributor.cpp
    llvm/lib/Transforms/IPO/AttributorAttributes.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 482e0868e7e7a..526929454e03e 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -164,6 +164,9 @@ enum class GPUAddressSpace : unsigned {
   Local = 5,
 };
 
+/// Return true iff \p M target a GPU (and we can use GPU AS reasoning).
+bool isGPU(const Module &M);
+
 /// Flags to distinguish intra-procedural queries from *potentially*
 /// inter-procedural queries. Not that information can be valid for both and
 /// therefore both bits might be set.

diff  --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 4bb1aad47c0ff..f6e08a9216dec 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -186,6 +186,11 @@ ChangeStatus &llvm::operator&=(ChangeStatus &L, ChangeStatus R) {
 }
 ///}
 
+bool AA::isGPU(const Module &M) {
+  Triple T(M.getTargetTriple());
+  return T.isAMDGPU() || T.isNVPTX();
+}
+
 bool AA::isNoSyncInst(Attributor &A, const Instruction &I,
                       const AbstractAttribute &QueryingAA) {
   // We are looking for volatile instructions or non-relaxed atomics.

diff  --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 1c7441a5ae851..0b6ea681655a1 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1124,8 +1124,7 @@ struct AAPointerInfoImpl
     // outlive a GPU kernel. This is true for shared, constant, and local
     // globals on AMD and NVIDIA GPUs.
     auto HasKernelLifetime = [&](Value *V, Module &M) {
-      Triple T(M.getTargetTriple());
-      if (!(T.isAMDGPU() || T.isNVPTX()))
+      if (!AA::isGPU(M))
         return false;
       switch (AA::GPUAddressSpace(V->getType()->getPointerAddressSpace())) {
       case AA::GPUAddressSpace::Shared:
@@ -8662,7 +8661,8 @@ struct AAMemoryLocationImpl : public AAMemoryLocation {
   /// Determine the underlying locations kinds for \p Ptr, e.g., globals or
   /// arguments, and update the state and access map accordingly.
   void categorizePtrValue(Attributor &A, const Instruction &I, const Value &Ptr,
-                          AAMemoryLocation::StateType &State, bool &Changed);
+                          AAMemoryLocation::StateType &State, bool &Changed,
+                          unsigned AccessAS = 0);
 
   /// Used to allocate access sets.
   BumpPtrAllocator &Allocator;
@@ -8670,14 +8670,24 @@ struct AAMemoryLocationImpl : public AAMemoryLocation {
 
 void AAMemoryLocationImpl::categorizePtrValue(
     Attributor &A, const Instruction &I, const Value &Ptr,
-    AAMemoryLocation::StateType &State, bool &Changed) {
+    AAMemoryLocation::StateType &State, bool &Changed, unsigned AccessAS) {
   LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Categorize pointer locations for "
                     << Ptr << " ["
                     << getMemoryLocationsAsStr(State.getAssumed()) << "]\n");
 
   auto Pred = [&](Value &Obj) {
+    unsigned ObjectAS = Obj.getType()->getPointerAddressSpace();
     // TODO: recognize the TBAA used for constant accesses.
     MemoryLocationsKind MLK = NO_LOCATIONS;
+
+    // Filter accesses to constant (GPU) memory if we have an AS at the access
+    // site or the object is known to actually have the associated AS.
+    if ((AccessAS == (unsigned)AA::GPUAddressSpace::Constant ||
+         (ObjectAS == (unsigned)AA::GPUAddressSpace::Constant &&
+          isIdentifiedObject(&Obj))) &&
+        AA::isGPU(*I.getModule()))
+      return true;
+
     if (isa<UndefValue>(&Obj))
       return true;
     if (isa<Argument>(&Obj)) {
@@ -8701,8 +8711,8 @@ void AAMemoryLocationImpl::categorizePtrValue(
       else
         MLK = NO_GLOBAL_EXTERNAL_MEM;
     } else if (isa<ConstantPointerNull>(&Obj) &&
-               !NullPointerIsDefined(getAssociatedFunction(),
-                                     Ptr.getType()->getPointerAddressSpace())) {
+               (!NullPointerIsDefined(getAssociatedFunction(), AccessAS) ||
+                !NullPointerIsDefined(getAssociatedFunction(), ObjectAS))) {
       return true;
     } else if (isa<AllocaInst>(&Obj)) {
       MLK = NO_LOCAL_MEM;
@@ -8840,7 +8850,8 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
     LLVM_DEBUG(
         dbgs() << "[AAMemoryLocation] Categorize memory access with pointer: "
                << I << " [" << *Ptr << "]\n");
-    categorizePtrValue(A, I, *Ptr, AccessedLocs, Changed);
+    categorizePtrValue(A, I, *Ptr, AccessedLocs, Changed,
+                       Ptr->getType()->getPointerAddressSpace());
     return AccessedLocs.getAssumed();
   }
 

diff  --git a/llvm/test/Transforms/Attributor/memory_locations_gpu.ll b/llvm/test/Transforms/Attributor/memory_locations_gpu.ll
new file mode 100644
index 0000000000000..7637991ff07a7
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/memory_locations_gpu.ll
@@ -0,0 +1,107 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "amdgcn-amd-amdhsa"
+
+ at G = external dso_local addrspace(4) global i32, align 4
+
+declare ptr @ptr() memory(none)
+declare ptr addrspace(4) @ptr_to_const() memory(none)
+declare ptr addrspace(3) @ptr_to_shared() memory(none)
+
+;.
+; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external dso_local addrspace(4) global i32, align 4
+;.
+; Should be memory(none)
+define i32 @test_const_as_global1() {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define {{[^@]+}}@test_const_as_global1
+; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr addrspace(4) @G, align 4
+; CHECK-NEXT:    ret i32 [[L1]]
+;
+  %l1 = load i32, ptr addrspace(4) @G
+  ret i32 %l1
+}
+; Should be memory(none)
+define i32 @test_const_as_global2() {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define {{[^@]+}}@test_const_as_global2
+; CHECK-SAME: () #[[ATTR1]] {
+; CHECK-NEXT:    [[L2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @G to ptr), align 4
+; CHECK-NEXT:    ret i32 [[L2]]
+;
+  %l2 = load i32, ptr addrspacecast (ptr addrspace(4) @G to ptr)
+  ret i32 %l2
+}
+; Should be memory(none)
+define i32 @test_const_as_call1() {
+; CHECK: Function Attrs: nosync memory(read)
+; CHECK-LABEL: define {{[^@]+}}@test_const_as_call1
+; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    [[P1:%.*]] = call ptr addrspace(4) @ptr_to_const()
+; CHECK-NEXT:    [[C1:%.*]] = addrspacecast ptr addrspace(4) [[P1]] to ptr
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[C1]], align 4
+; CHECK-NEXT:    ret i32 [[L1]]
+;
+  %p1 = call ptr addrspace(4) @ptr_to_const()
+  %c1 = addrspacecast ptr addrspace(4) %p1 to ptr
+  %l1 = load i32, ptr %c1
+  ret i32 %l1
+}
+; Should be memory(none)
+define i32 @test_const_as_call2() {
+; CHECK: Function Attrs: nosync memory(none)
+; CHECK-LABEL: define {{[^@]+}}@test_const_as_call2
+; CHECK-SAME: () #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:    [[P2:%.*]] = call ptr @ptr()
+; CHECK-NEXT:    [[C2:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(4)
+; CHECK-NEXT:    [[L2:%.*]] = load i32, ptr addrspace(4) [[C2]], align 4
+; CHECK-NEXT:    ret i32 [[L2]]
+;
+  %p2 = call ptr @ptr()
+  %c2 = addrspacecast ptr %p2 to ptr addrspace(4)
+  %l2 = load i32, ptr addrspace(4) %c2
+  ret i32 %l2
+}
+
+; Should be memory(read)
+define i32 @test_shared_as_call1() {
+; CHECK: Function Attrs: nosync memory(read)
+; CHECK-LABEL: define {{[^@]+}}@test_shared_as_call1
+; CHECK-SAME: () #[[ATTR2]] {
+; CHECK-NEXT:    [[P1:%.*]] = call ptr addrspace(3) @ptr_to_shared()
+; CHECK-NEXT:    [[C1:%.*]] = addrspacecast ptr addrspace(3) [[P1]] to ptr
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[C1]], align 4
+; CHECK-NEXT:    ret i32 [[L1]]
+;
+  %p1 = call ptr addrspace(3) @ptr_to_shared()
+  %c1 = addrspacecast ptr addrspace(3) %p1 to ptr
+  %l1 = load i32, ptr %c1
+  ret i32 %l1
+}
+; Should be memory(read)
+define i32 @test_shared_as_call2() {
+; CHECK: Function Attrs: nosync memory(read)
+; CHECK-LABEL: define {{[^@]+}}@test_shared_as_call2
+; CHECK-SAME: () #[[ATTR2]] {
+; CHECK-NEXT:    [[P2:%.*]] = call ptr @ptr()
+; CHECK-NEXT:    [[C2:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(3)
+; CHECK-NEXT:    [[L2:%.*]] = load i32, ptr addrspace(3) [[C2]], align 4
+; CHECK-NEXT:    ret i32 [[L2]]
+;
+  %p2 = call ptr @ptr()
+  %c2 = addrspacecast ptr %p2 to ptr addrspace(3)
+  %l2 = load i32, ptr addrspace(3) %c2
+  ret i32 %l2
+}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { memory(none) }
+; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR2]] = { nosync memory(read) }
+; CHECK: attributes #[[ATTR3]] = { nosync memory(none) }
+;.
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CGSCC: {{.*}}
+; TUNIT: {{.*}}


        


More information about the llvm-commits mailing list