[llvm] [Attributor] Take the address space from addrspacecast directly (PR #108258)

Shilei Tian via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 11 15:32:54 PDT 2024


https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108258

>From 3211bfc0951b9163643862a0ed02b757087b724c Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Wed, 11 Sep 2024 12:23:32 -0400
Subject: [PATCH] [Attributor] Take the address space from addrspacecast
 directly

If the value to be analyzed is directly from addrspacecast, we take the source
address space directly. This is to improve the case where in
`AMDGPUPromoteKernelArgumentsPass`, the kernel argument is promoted by
insertting an addrspacecast directly from a generic pointer. However, during the
analysis, the underlying object will be the generic pointer, instead of the
addrspacecast, thus the inferred address space is the generic one, which is not
ideal.
---
 llvm/include/llvm/Transforms/IPO/Attributor.h |  4 +-
 .../Transforms/IPO/AttributorAttributes.cpp   | 83 ++++++++++++++-----
 llvm/test/CodeGen/AMDGPU/aa-as-infer.ll       | 35 ++++++++
 llvm/test/CodeGen/AMDGPU/addrspacecast.ll     | 15 +++-
 4 files changed, 112 insertions(+), 25 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 6ab63ba582c546..921fe945539510 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6249,7 +6249,7 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
   /// Return the address space of the associated value. \p NoAddressSpace is
   /// returned if the associated value is dead. This functions is not supposed
   /// to be called if the AA is invalid.
-  virtual int32_t getAddressSpace() const = 0;
+  virtual uint32_t getAddressSpace() const = 0;
 
   /// Create an abstract attribute view for the position \p IRP.
   static AAAddressSpace &createForPosition(const IRPosition &IRP,
@@ -6268,7 +6268,7 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
   }
 
   // No address space which indicates the associated value is dead.
-  static const int32_t NoAddressSpace = -1;
+  static const uint32_t NoAddressSpace = ~0U;
 
   /// Unique ID (due to the unique address)
   static const char ID;
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index b73f5265f87874..31d45014ac00f4 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12562,7 +12562,7 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
   AAAddressSpaceImpl(const IRPosition &IRP, Attributor &A)
       : AAAddressSpace(IRP, A) {}
 
-  int32_t getAddressSpace() const override {
+  uint32_t getAddressSpace() const override {
     assert(isValidState() && "the AA is invalid");
     return AssumedAddressSpace;
   }
@@ -12571,17 +12571,59 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
   void initialize(Attributor &A) override {
     assert(getAssociatedType()->isPtrOrPtrVectorTy() &&
            "Associated value is not a pointer");
-    if (getAssociatedType()->getPointerAddressSpace())
+    // If the pointer already has non-generic address space, we assume it is the
+    // correct one.
+    if (getAssociatedType()->getPointerAddressSpace()) {
+      [[maybe_unused]] bool R =
+          takeAddressSpace(getAssociatedType()->getPointerAddressSpace());
+      assert(R && "the take should happen");
       indicateOptimisticFixpoint();
+      return;
+    }
+    // If the pointer is an addrspacecast, we assume the source address space is
+    // the correct one.
+    Value *V = &getAssociatedValue();
+    if (auto *ASC = dyn_cast<AddrSpaceCastInst>(V)) {
+      [[maybe_unused]] bool R = takeAddressSpace(ASC->getSrcAddressSpace());
+      assert(R && "the take should happen");
+      indicateOptimisticFixpoint();
+      return;
+    }
+    if (auto *C = dyn_cast<ConstantExpr>(V)) {
+      if (C->getOpcode() == Instruction::AddrSpaceCast) {
+        [[maybe_unused]] bool R = takeAddressSpace(
+            C->getOperand(0)->getType()->getPointerAddressSpace());
+        assert(R && "the take should happen");
+        indicateOptimisticFixpoint();
+        return;
+      }
+    }
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
-    int32_t OldAddressSpace = AssumedAddressSpace;
+    uint32_t OldAddressSpace = AssumedAddressSpace;
     auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(getIRPosition(), this,
                                                         DepClassTy::REQUIRED);
     auto Pred = [&](Value &Obj) {
       if (isa<UndefValue>(&Obj))
         return true;
+      // If an argument in generic address space has addrspace cast uses, and
+      // those casts are same, then we take the dst addrspace.
+      if (auto *Arg = dyn_cast<Argument>(&Obj)) {
+        if (Arg->getType()->getPointerAddressSpace() == 0) {
+          unsigned CastAddrSpace = 0;
+          for (auto *U : Arg->users()) {
+            auto *ASCI = dyn_cast<AddrSpaceCastInst>(U);
+            if (!ASCI)
+              continue;
+            if (CastAddrSpace && CastAddrSpace != ASCI->getDestAddressSpace())
+              return false;
+            CastAddrSpace = ASCI->getDestAddressSpace();
+          }
+          if (CastAddrSpace)
+            return takeAddressSpace(CastAddrSpace);
+        }
+      }
       return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
     };
 
@@ -12594,19 +12636,17 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 
   /// See AbstractAttribute::manifest(...).
   ChangeStatus manifest(Attributor &A) override {
-    Value *AssociatedValue = &getAssociatedValue();
-    Value *OriginalValue = peelAddrspacecast(AssociatedValue);
-    if (getAddressSpace() == NoAddressSpace ||
-        static_cast<uint32_t>(getAddressSpace()) ==
-            getAssociatedType()->getPointerAddressSpace())
+    unsigned NewAS = getAddressSpace();
+    if (NewAS == NoAddressSpace ||
+        NewAS == getAssociatedType()->getPointerAddressSpace())
       return ChangeStatus::UNCHANGED;
 
-    PointerType *NewPtrTy =
-        PointerType::get(getAssociatedType()->getContext(),
-                         static_cast<uint32_t>(getAddressSpace()));
+    Value *AssociatedValue = &getAssociatedValue();
+    Value *OriginalValue = peelAddrspacecast(AssociatedValue);
     bool UseOriginalValue =
-        OriginalValue->getType()->getPointerAddressSpace() ==
-        static_cast<uint32_t>(getAddressSpace());
+        OriginalValue->getType()->getPointerAddressSpace() == NewAS;
+    PointerType *NewPtrTy =
+        PointerType::get(getAssociatedType()->getContext(), NewAS);
 
     bool Changed = false;
 
@@ -12656,9 +12696,9 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
   }
 
 private:
-  int32_t AssumedAddressSpace = NoAddressSpace;
+  uint32_t AssumedAddressSpace = NoAddressSpace;
 
-  bool takeAddressSpace(int32_t AS) {
+  bool takeAddressSpace(uint32_t AS) {
     if (AssumedAddressSpace == NoAddressSpace) {
       AssumedAddressSpace = AS;
       return true;
@@ -12667,11 +12707,16 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
   }
 
   static Value *peelAddrspacecast(Value *V) {
-    if (auto *I = dyn_cast<AddrSpaceCastInst>(V))
-      return peelAddrspacecast(I->getPointerOperand());
+    if (auto *I = dyn_cast<AddrSpaceCastInst>(V)) {
+      assert(I->getSrcAddressSpace() && "there should not be AS 0 -> AS X");
+      return I->getPointerOperand();
+    }
     if (auto *C = dyn_cast<ConstantExpr>(V))
-      if (C->getOpcode() == Instruction::AddrSpaceCast)
-        return peelAddrspacecast(C->getOperand(0));
+      if (C->getOpcode() == Instruction::AddrSpaceCast) {
+        assert(C->getOperand(0)->getType()->getPointerAddressSpace() &&
+               "there should not be AS 0 -> AS X");
+        return C->getOperand(0);
+      }
     return V;
   }
 };
diff --git a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
index fdc5debb18915c..87085eb0117a6f 100644
--- a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
+++ b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
@@ -243,3 +243,38 @@ define void @foo(ptr addrspace(3) %val) {
   ret void
 }
 
+define void @kernel_argument_promotion_pattern_intra_procedure(ptr %p, i32 %val) {
+; CHECK-LABEL: define void @kernel_argument_promotion_pattern_intra_procedure(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[P_CAST_0:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[P_CAST_0]], align 4
+; CHECK-NEXT:    ret void
+;
+  %p.cast.0 = addrspacecast ptr %p to ptr addrspace(1)
+  %p.cast.1 = addrspacecast ptr addrspace(1) %p.cast.0 to ptr
+  store i32 %val, ptr %p.cast.1
+  ret void
+}
+
+define internal void @use_kernel_argument_after_promotion(ptr %p, i32 %val) {
+; CHECK-LABEL: define internal void @use_kernel_argument_after_promotion(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[TMP1]], align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 %val, ptr %p
+  ret void
+}
+
+define void @kernel_argument_promotion_pattern_inter_procedure(ptr %p, i32 %val) {
+; CHECK-LABEL: define void @kernel_argument_promotion_pattern_inter_procedure(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    call void @use_kernel_argument_after_promotion(ptr [[P]], i32 [[VAL]])
+; CHECK-NEXT:    ret void
+;
+  %p.cast.0 = addrspacecast ptr %p to ptr addrspace(1)
+  %p.cast.1 = addrspacecast ptr addrspace(1) %p.cast.0 to ptr
+  call void @use_kernel_argument_after_promotion(ptr %p.cast.1, i32 %val)
+  ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
index 7336543b41cbc8..3deb882354a3bd 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -215,11 +215,14 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 {
 }
 
 ; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
+; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
+; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
+
+; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
 
 ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
-; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]]
+; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
 define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
   %cast = addrspacecast ptr addrspace(3) null to ptr
   store volatile i32 7, ptr %cast
@@ -259,10 +262,14 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
 
 ; FIXME: Shouldn't need to enable queue ptr
 ; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
+; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
+; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
+
+; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_private_base
+
 ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
-; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]]
+; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
 define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
   %cast = addrspacecast ptr addrspace(5) null to ptr
   store volatile i32 7, ptr %cast



More information about the llvm-commits mailing list