[llvm] [Attributor] Take the address space from addrspacecast directly (PR #108258)

Shilei Tian via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 14 11:05:51 PDT 2024


https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108258

>From c06fe68c08c080eadcfb64624b84dd4243d03d31 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Thu, 12 Sep 2024 15:25:43 -0400
Subject: [PATCH 1/2] [Attributor] Use more appropriate approach to check flat
 address space

---
 llvm/include/llvm/Transforms/IPO/Attributor.h | 11 ++++++--
 llvm/lib/Transforms/IPO/Attributor.cpp        | 28 +++++++++++++++++++
 .../Transforms/IPO/AttributorAttributes.cpp   | 27 +++++++++++++-----
 .../Attributor/address_space_info.ll          |  2 +-
 .../test/Transforms/Attributor/nocapture-1.ll |  4 +--
 .../Transforms/Attributor/value-simplify.ll   |  3 +-
 6 files changed, 60 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 921fe945539510..07ac7c15c692ec 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1332,7 +1332,7 @@ struct InformationCache {
   bool stackIsAccessibleByOtherThreads() { return !targetIsGPU(); }
 
   /// Return true if the target is a GPU.
-  bool targetIsGPU() {
+  bool targetIsGPU() const {
     return TargetTriple.isAMDGPU() || TargetTriple.isNVPTX();
   }
 
@@ -1341,6 +1341,8 @@ struct InformationCache {
   const ArrayRef<Function *>
   getIndirectlyCallableFunctions(Attributor &A) const;
 
+  unsigned getFlatAddressSpace(const Function *F);
+
 private:
   struct FunctionInfo {
     ~FunctionInfo();
@@ -1383,6 +1385,9 @@ struct InformationCache {
   /// through the information cache interface *prior* to looking at them.
   void initializeInformationCache(const Function &F, FunctionInfo &FI);
 
+  /// Return the assumed flat address space.
+  unsigned getAssumedFlatAddressSpace() const;
+
   /// The datalayout used in the module.
   const DataLayout &DL;
 
@@ -6267,8 +6272,8 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
     return (AA->getIdAddr() == &ID);
   }
 
-  // No address space which indicates the associated value is dead.
-  static const uint32_t NoAddressSpace = ~0U;
+  // Invalid address space which indicates the associated value is dead.
+  static const uint32_t InvalidAddressSpace = ~0U;
 
   /// Unique ID (due to the unique address)
   static const char ID;
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 56d1133b25549a..b4197fdaf0c3ae 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MustExecute.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/AttributeMask.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Constant.h"
@@ -179,6 +180,12 @@ static cl::opt<bool> CloseWorldAssumption(
     "attributor-assume-closed-world", cl::Hidden,
     cl::desc("Should a closed world be assumed, or not. Default if not set."));
 
+static cl::opt<unsigned> AssumedFlatAddressSpace(
+    "attributor-assume-flat-address-space", cl::Hidden,
+    cl::desc(
+        "The assumed flat address space. This is mainly for test purpose."),
+    cl::init(~0U));
+
 /// Logic operators for the change status enum class.
 ///
 ///{
@@ -3294,6 +3301,27 @@ InformationCache::getIndirectlyCallableFunctions(Attributor &A) const {
   return IndirectlyCallableFunctions;
 }
 
+unsigned InformationCache::getFlatAddressSpace(const Function *F) {
+  if (!F)
+    return getAssumedFlatAddressSpace();
+  auto *TTI = getAnalysisResultForFunction<TargetIRAnalysis>(*F);
+  if (!TTI)
+    return getAssumedFlatAddressSpace();
+  return TTI->getFlatAddressSpace();
+}
+
+unsigned InformationCache::getAssumedFlatAddressSpace() const {
+  if (targetIsGPU()) {
+    if (TargetTriple.isAMDGPU() || TargetTriple.isNVPTX()) {
+      // We use 0 here directly instead of enumeration such that we don't need
+      // to include the target headers.
+      return 0;
+    }
+    llvm_unreachable("unknown GPU target");
+  }
+  return AssumedFlatAddressSpace;
+}
+
 void Attributor::recordDependence(const AbstractAttribute &FromAA,
                                   const AbstractAttribute &ToAA,
                                   DepClassTy DepClass) {
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 217c7cccb5775a..9c775e48f28195 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12571,8 +12571,20 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
   void initialize(Attributor &A) override {
     assert(getAssociatedType()->isPtrOrPtrVectorTy() &&
            "Associated value is not a pointer");
-    if (getAssociatedType()->getPointerAddressSpace())
+
+    unsigned FlatAS =
+        A.getInfoCache().getFlatAddressSpace(getAssociatedFunction());
+    if (FlatAS == InvalidAddressSpace) {
+      indicatePessimisticFixpoint();
+      return;
+    }
+
+    unsigned AS = getAssociatedType()->getPointerAddressSpace();
+    if (AS != FlatAS) {
+      [[maybe_unused]] bool R = takeAddressSpace(AS);
+      assert(R && "The take should happen");
       indicateOptimisticFixpoint();
+    }
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
@@ -12594,12 +12606,13 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 
   /// See AbstractAttribute::manifest(...).
   ChangeStatus manifest(Attributor &A) override {
-    Value *AssociatedValue = &getAssociatedValue();
-    Value *OriginalValue = peelAddrspacecast(AssociatedValue);
-    if (getAddressSpace() == NoAddressSpace ||
+    if (getAddressSpace() == InvalidAddressSpace ||
         getAddressSpace() == getAssociatedType()->getPointerAddressSpace())
       return ChangeStatus::UNCHANGED;
 
+    Value *AssociatedValue = &getAssociatedValue();
+    Value *OriginalValue = peelAddrspacecast(AssociatedValue);
+
     PointerType *NewPtrTy =
         PointerType::get(getAssociatedType()->getContext(), getAddressSpace());
     bool UseOriginalValue =
@@ -12646,17 +12659,17 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
     if (!isValidState())
       return "addrspace(<invalid>)";
     return "addrspace(" +
-           (AssumedAddressSpace == NoAddressSpace
+           (AssumedAddressSpace == InvalidAddressSpace
                 ? "none"
                 : std::to_string(AssumedAddressSpace)) +
            ")";
   }
 
 private:
-  uint32_t AssumedAddressSpace = NoAddressSpace;
+  uint32_t AssumedAddressSpace = InvalidAddressSpace;
 
   bool takeAddressSpace(uint32_t AS) {
-    if (AssumedAddressSpace == NoAddressSpace) {
+    if (AssumedAddressSpace == InvalidAddressSpace) {
       AssumedAddressSpace = AS;
       return true;
     }
diff --git a/llvm/test/Transforms/Attributor/address_space_info.ll b/llvm/test/Transforms/Attributor/address_space_info.ll
index 73dd93c55b819b..d0e8cc67736ca7 100644
--- a/llvm/test/Transforms/Attributor/address_space_info.ll
+++ b/llvm/test/Transforms/Attributor/address_space_info.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --prefix-filecheck-ir-name true
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-assume-flat-address-space=0 -S < %s | FileCheck %s --check-prefixes=CHECK
 
 @dst = dso_local addrspace(1) externally_initialized global i32 0, align 4
 @g1 = dso_local addrspace(1) externally_initialized global ptr null, align 4
diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll
index 3401ddfdd7d758..de5f31e470edfc 100644
--- a/llvm/test/Transforms/Attributor/nocapture-1.ll
+++ b/llvm/test/Transforms/Attributor/nocapture-1.ll
@@ -257,7 +257,7 @@ define i32 @nc1_addrspace(ptr %q, ptr addrspace(1) %p, i1 %b) {
 ; TUNIT-NEXT:    [[TMP:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
 ; TUNIT-NEXT:    [[TMP2:%.*]] = select i1 [[B]], ptr [[TMP]], ptr [[Q]]
 ; TUNIT-NEXT:    [[VAL:%.*]] = load i32, ptr [[TMP2]], align 4
-; TUNIT-NEXT:    store i32 0, ptr addrspace(1) [[P]], align 4
+; TUNIT-NEXT:    store i32 0, ptr [[TMP]], align 4
 ; TUNIT-NEXT:    store ptr [[Q]], ptr @g, align 8
 ; TUNIT-NEXT:    ret i32 [[VAL]]
 ;
@@ -272,7 +272,7 @@ define i32 @nc1_addrspace(ptr %q, ptr addrspace(1) %p, i1 %b) {
 ; CGSCC-NEXT:    [[TMP:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
 ; CGSCC-NEXT:    [[TMP2:%.*]] = select i1 [[B]], ptr [[TMP]], ptr [[Q]]
 ; CGSCC-NEXT:    [[VAL:%.*]] = load i32, ptr [[TMP2]], align 4
-; CGSCC-NEXT:    store i32 0, ptr addrspace(1) [[P]], align 4
+; CGSCC-NEXT:    store i32 0, ptr [[TMP]], align 4
 ; CGSCC-NEXT:    store ptr [[Q]], ptr @g, align 8
 ; CGSCC-NEXT:    ret i32 [[VAL]]
 ;
diff --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll
index 68f179c88116e4..a5789790cc92a1 100644
--- a/llvm/test/Transforms/Attributor/value-simplify.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify.ll
@@ -838,8 +838,7 @@ define void @user() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
 ; TUNIT-LABEL: define {{[^@]+}}@user
 ; TUNIT-SAME: () #[[ATTR5]] {
-; TUNIT-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspacecast (ptr addrspace(3) @ConstAS3Ptr to ptr) to ptr addrspace(3)
-; TUNIT-NEXT:    store i32 0, ptr addrspace(3) [[TMP1]], align 4
+; TUNIT-NEXT:    store i32 0, ptr addrspacecast (ptr addrspace(3) @ConstAS3Ptr to ptr), align 4
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(write)

>From cc9440a76d8f754603fb0c4d23f60d684808681f Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Wed, 11 Sep 2024 12:23:32 -0400
Subject: [PATCH 2/2] [Attributor] Take the address space from addrspacecast
 directly

If the value to be analyzed is directly from addrspacecast, we take the source
address space directly. This is to improve the case where in
`AMDGPUPromoteKernelArgumentsPass`, the kernel argument is promoted by
insertting an addrspacecast directly from a generic pointer. However, during the
analysis, the underlying object will be the generic pointer, instead of the
addrspacecast, thus the inferred address space is the generic one, which is not
ideal.
---
 .../Transforms/IPO/AttributorAttributes.cpp   | 54 ++++++++++++++-----
 llvm/test/CodeGen/AMDGPU/aa-as-infer.ll       | 35 ++++++++++++
 llvm/test/CodeGen/AMDGPU/addrspacecast.ll     |  3 +-
 3 files changed, 77 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 9c775e48f28195..749c5ea0bfcf6c 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12589,15 +12589,37 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 
   ChangeStatus updateImpl(Attributor &A) override {
     uint32_t OldAddressSpace = AssumedAddressSpace;
-    auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(getIRPosition(), this,
-                                                        DepClassTy::REQUIRED);
-    auto Pred = [&](Value &Obj) {
+
+    auto CheckAddressSpace = [&](Value &Obj) {
       if (isa<UndefValue>(&Obj))
         return true;
+      // If an argument in flat address space has addrspace cast uses, and those
+      // casts are same, then we take the dst addrspace.
+      if (auto *Arg = dyn_cast<Argument>(&Obj)) {
+        unsigned FlatAS =
+            A.getInfoCache().getFlatAddressSpace(Arg->getParent());
+        if (FlatAS != InvalidAddressSpace &&
+            Arg->getType()->getPointerAddressSpace() == FlatAS) {
+          unsigned CastAddrSpace = FlatAS;
+          for (auto *U : Arg->users()) {
+            auto *ASCI = dyn_cast<AddrSpaceCastInst>(U);
+            if (!ASCI)
+              continue;
+            if (CastAddrSpace != FlatAS &&
+                CastAddrSpace != ASCI->getDestAddressSpace())
+              return false;
+            CastAddrSpace = ASCI->getDestAddressSpace();
+          }
+          if (CastAddrSpace != FlatAS)
+            return takeAddressSpace(CastAddrSpace);
+        }
+      }
       return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
     };
 
-    if (!AUO->forallUnderlyingObjects(Pred))
+    auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(getIRPosition(), this,
+                                                        DepClassTy::REQUIRED);
+    if (!AUO->forallUnderlyingObjects(CheckAddressSpace))
       return indicatePessimisticFixpoint();
 
     return OldAddressSpace == AssumedAddressSpace ? ChangeStatus::UNCHANGED
@@ -12606,17 +12628,18 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 
   /// See AbstractAttribute::manifest(...).
   ChangeStatus manifest(Attributor &A) override {
-    if (getAddressSpace() == InvalidAddressSpace ||
-        getAddressSpace() == getAssociatedType()->getPointerAddressSpace())
+    unsigned NewAS = getAddressSpace();
+
+    if (NewAS == InvalidAddressSpace ||
+        NewAS == getAssociatedType()->getPointerAddressSpace())
       return ChangeStatus::UNCHANGED;
 
     Value *AssociatedValue = &getAssociatedValue();
     Value *OriginalValue = peelAddrspacecast(AssociatedValue);
-
     PointerType *NewPtrTy =
-        PointerType::get(getAssociatedType()->getContext(), getAddressSpace());
+        PointerType::get(getAssociatedType()->getContext(), NewAS);
     bool UseOriginalValue =
-        OriginalValue->getType()->getPointerAddressSpace() == getAddressSpace();
+        OriginalValue->getType()->getPointerAddressSpace() == NewAS;
 
     bool Changed = false;
 
@@ -12677,11 +12700,16 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
   }
 
   static Value *peelAddrspacecast(Value *V) {
-    if (auto *I = dyn_cast<AddrSpaceCastInst>(V))
-      return peelAddrspacecast(I->getPointerOperand());
+    if (auto *I = dyn_cast<AddrSpaceCastInst>(V)) {
+      assert(I->getSrcAddressSpace() && "there should not be AS 0 -> AS X");
+      return I->getPointerOperand();
+    }
     if (auto *C = dyn_cast<ConstantExpr>(V))
-      if (C->getOpcode() == Instruction::AddrSpaceCast)
-        return peelAddrspacecast(C->getOperand(0));
+      if (C->getOpcode() == Instruction::AddrSpaceCast) {
+        assert(C->getOperand(0)->getType()->getPointerAddressSpace() &&
+               "there should not be AS 0 -> AS X");
+        return C->getOperand(0);
+      }
     return V;
   }
 };
diff --git a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
index fdc5debb18915c..d1a6414fe49ae1 100644
--- a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
+++ b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
@@ -243,3 +243,38 @@ define void @foo(ptr addrspace(3) %val) {
   ret void
 }
 
+define void @kernel_argument_promotion_pattern_intra_procedure(ptr %p, i32 %val) {
+; CHECK-LABEL: define void @kernel_argument_promotion_pattern_intra_procedure(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[P_CAST_0:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[P_CAST_0]], align 4
+; CHECK-NEXT:    ret void
+;
+  %p.cast.0 = addrspacecast ptr %p to ptr addrspace(1)
+  %p.cast.1 = addrspacecast ptr addrspace(1) %p.cast.0 to ptr
+  store i32 %val, ptr %p.cast.1
+  ret void
+}
+
+define internal void @use_argument_after_promotion(ptr %p, i32 %val) {
+; CHECK-LABEL: define internal void @use_argument_after_promotion(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[TMP1]], align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 %val, ptr %p
+  ret void
+}
+
+define void @kernel_argument_promotion_pattern_inter_procedure(ptr %p, i32 %val) {
+; CHECK-LABEL: define void @kernel_argument_promotion_pattern_inter_procedure(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    call void @use_argument_after_promotion(ptr [[P]], i32 [[VAL]])
+; CHECK-NEXT:    ret void
+;
+  %p.cast.0 = addrspacecast ptr %p to ptr addrspace(1)
+  %p.cast.1 = addrspacecast ptr addrspace(1) %p.cast.0 to ptr
+  call void @use_argument_after_promotion(ptr %p.cast.1, i32 %val)
+  ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
index 7336543b41cbc8..930aaecfed19b8 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -214,7 +214,7 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 {
   ret void
 }
 
-; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
+;HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
 
 ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
@@ -258,7 +258,6 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
 }
 
 ; FIXME: Shouldn't need to enable queue ptr
-; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
 ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}



More information about the llvm-commits mailing list