[llvm] [Attributor] Take the address space from addrspacecast directly (PR #108258)

Shilei Tian via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 8 13:06:43 PDT 2024


https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108258

>From 242582bc0e6d9eedece40dd1d4e6d2d74732243b Mon Sep 17 00:00:00 2001
From: Shilei Tian <shilei.tian at amd.com>
Date: Tue, 8 Oct 2024 15:48:23 -0400
Subject: [PATCH] [Attributor] Take the address space from addrspacecast
 directly

If the value to be analyzed is directly from addrspacecast, we take the source
address space directly. This is to improve the case where in
`AMDGPUPromoteKernelArgumentsPass`, the kernel argument is promoted by
insertting an addrspacecast directly from a generic pointer. However, during the
analysis, the underlying object will be the generic pointer, instead of the
addrspacecast, thus the inferred address space is the generic one, which is not
ideal.
---
 .../Transforms/IPO/AttributorAttributes.cpp   | 60 ++++++++++---
 llvm/test/CodeGen/AMDGPU/aa-as-infer.ll       | 85 +++++++++++++++++++
 2 files changed, 131 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 45e2a0d0b93363..c3fa3e84c0ae18 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12583,16 +12583,37 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
+    assert(A.getInfoCache().getFlatAddressSpace().has_value());
+    unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
     uint32_t OldAddressSpace = AssumedAddressSpace;
-    auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(getIRPosition(), this,
-                                                        DepClassTy::REQUIRED);
-    auto Pred = [&](Value &Obj) {
+
+    auto CheckAddressSpace = [&](Value &Obj) {
       if (isa<UndefValue>(&Obj))
         return true;
+      // If an argument in flat address space only has addrspace cast uses, and
+      // those casts are same, then we take the dst addrspace.
+      if (auto *Arg = dyn_cast<Argument>(&Obj)) {
+        if (Arg->getType()->getPointerAddressSpace() == FlatAS) {
+          unsigned CastAddrSpace = FlatAS;
+          for (auto *U : Arg->users()) {
+            auto *ASCI = dyn_cast<AddrSpaceCastInst>(U);
+            if (!ASCI)
+              return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
+            if (CastAddrSpace != FlatAS &&
+                CastAddrSpace != ASCI->getDestAddressSpace())
+              return false;
+            CastAddrSpace = ASCI->getDestAddressSpace();
+          }
+          if (CastAddrSpace != FlatAS)
+            return takeAddressSpace(CastAddrSpace);
+        }
+      }
       return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
     };
 
-    if (!AUO->forallUnderlyingObjects(Pred))
+    auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(getIRPosition(), this,
+                                                        DepClassTy::REQUIRED);
+    if (!AUO->forallUnderlyingObjects(CheckAddressSpace))
       return indicatePessimisticFixpoint();
 
     return OldAddressSpace == AssumedAddressSpace ? ChangeStatus::UNCHANGED
@@ -12601,17 +12622,21 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 
   /// See AbstractAttribute::manifest(...).
   ChangeStatus manifest(Attributor &A) override {
-    if (getAddressSpace() == InvalidAddressSpace ||
-        getAddressSpace() == getAssociatedType()->getPointerAddressSpace())
+    unsigned NewAS = getAddressSpace();
+
+    if (NewAS == InvalidAddressSpace ||
+        NewAS == getAssociatedType()->getPointerAddressSpace())
       return ChangeStatus::UNCHANGED;
 
+    unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
+
     Value *AssociatedValue = &getAssociatedValue();
-    Value *OriginalValue = peelAddrspacecast(AssociatedValue);
+    Value *OriginalValue = peelAddrspacecast(AssociatedValue, FlatAS);
 
     PointerType *NewPtrTy =
-        PointerType::get(getAssociatedType()->getContext(), getAddressSpace());
+        PointerType::get(getAssociatedType()->getContext(), NewAS);
     bool UseOriginalValue =
-        OriginalValue->getType()->getPointerAddressSpace() == getAddressSpace();
+        OriginalValue->getType()->getPointerAddressSpace() == NewAS;
 
     bool Changed = false;
 
@@ -12671,12 +12696,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
     return AssumedAddressSpace == AS;
   }
 
-  static Value *peelAddrspacecast(Value *V) {
-    if (auto *I = dyn_cast<AddrSpaceCastInst>(V))
-      return peelAddrspacecast(I->getPointerOperand());
+  static Value *peelAddrspacecast(Value *V, unsigned FlatAS) {
+    if (auto *I = dyn_cast<AddrSpaceCastInst>(V)) {
+      assert(I->getSrcAddressSpace() != FlatAS &&
+             "there should not be flat AS -> non-flat AS");
+      return I->getPointerOperand();
+    }
     if (auto *C = dyn_cast<ConstantExpr>(V))
-      if (C->getOpcode() == Instruction::AddrSpaceCast)
-        return peelAddrspacecast(C->getOperand(0));
+      if (C->getOpcode() == Instruction::AddrSpaceCast) {
+        assert(C->getOperand(0)->getType()->getPointerAddressSpace() !=
+                   FlatAS &&
+               "there should not be flat AS -> non-flat AS X");
+        return C->getOperand(0);
+      }
     return V;
   }
 };
diff --git a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
index fdc5debb18915c..39772609200ac0 100644
--- a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
+++ b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
@@ -243,3 +243,88 @@ define void @foo(ptr addrspace(3) %val) {
   ret void
 }
 
+define void @kernel_argument_promotion_pattern_intra_procedure(ptr %p, i32 %val) {
+; CHECK-LABEL: define void @kernel_argument_promotion_pattern_intra_procedure(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[P_CAST_0:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[P_CAST_0]], align 4
+; CHECK-NEXT:    ret void
+;
+  %p.cast.0 = addrspacecast ptr %p to ptr addrspace(1)
+  %p.cast.1 = addrspacecast ptr addrspace(1) %p.cast.0 to ptr
+  store i32 %val, ptr %p.cast.1
+  ret void
+}
+
+define internal void @use_argument_after_promotion(ptr %p, i32 %val) {
+; CHECK-LABEL: define internal void @use_argument_after_promotion(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
+; CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[TMP1]], align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 %val, ptr %p
+  ret void
+}
+
+define void @kernel_argument_promotion_pattern_inter_procedure(ptr %p, i32 %val) {
+; CHECK-LABEL: define void @kernel_argument_promotion_pattern_inter_procedure(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    call void @use_argument_after_promotion(ptr [[P]], i32 [[VAL]])
+; CHECK-NEXT:    ret void
+;
+  %p.cast.0 = addrspacecast ptr %p to ptr addrspace(1)
+  %p.cast.1 = addrspacecast ptr addrspace(1) %p.cast.0 to ptr
+  call void @use_argument_after_promotion(ptr %p.cast.1, i32 %val)
+  ret void
+}
+
+define void @vec_kernel_argument_promotion_pattern_intra_procedure(<2 x ptr> %p, i32 %val) {
+; CHECK-LABEL: define void @vec_kernel_argument_promotion_pattern_intra_procedure(
+; CHECK-SAME: <2 x ptr> [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[P_CAST_0:%.*]] = addrspacecast <2 x ptr> [[P]] to <2 x ptr addrspace(1)>
+; CHECK-NEXT:    [[P_CAST_1:%.*]] = addrspacecast <2 x ptr addrspace(1)> [[P_CAST_0]] to <2 x ptr>
+; CHECK-NEXT:    [[P1:%.*]] = extractelement <2 x ptr> [[P_CAST_1]], i32 0
+; CHECK-NEXT:    [[P2:%.*]] = extractelement <2 x ptr> [[P_CAST_1]], i32 1
+; CHECK-NEXT:    store i32 [[VAL]], ptr [[P1]], align 4
+; CHECK-NEXT:    store i32 [[VAL]], ptr [[P2]], align 4
+; CHECK-NEXT:    ret void
+;
+  %p.cast.0 = addrspacecast <2 x ptr> %p to <2 x ptr addrspace(1)>
+  %p.cast.1 = addrspacecast <2 x ptr addrspace(1)> %p.cast.0 to <2 x ptr>
+  %p1 = extractelement <2 x ptr> %p.cast.1, i32 0
+  %p2 = extractelement <2 x ptr> %p.cast.1, i32 1
+  store i32 %val, ptr %p1
+  store i32 %val, ptr %p2
+  ret void
+}
+
+define internal void @use_vec_argument_after_promotion(<2 x ptr> %p, i32 %val) {
+; CHECK-LABEL: define internal void @use_vec_argument_after_promotion(
+; CHECK-SAME: <2 x ptr> [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[P1:%.*]] = extractelement <2 x ptr> [[P]], i32 0
+; CHECK-NEXT:    [[P2:%.*]] = extractelement <2 x ptr> [[P]], i32 1
+; CHECK-NEXT:    store i32 [[VAL]], ptr [[P1]], align 4
+; CHECK-NEXT:    store i32 [[VAL]], ptr [[P2]], align 4
+; CHECK-NEXT:    ret void
+;
+  %p1 = extractelement <2 x ptr> %p, i32 0
+  %p2 = extractelement <2 x ptr> %p, i32 1
+  store i32 %val, ptr %p1
+  store i32 %val, ptr %p2
+  ret void
+}
+
+define void @vec_kernel_argument_promotion_pattern_inter_procedure(<2 x ptr> %p, i32 %val) {
+; CHECK-LABEL: define void @vec_kernel_argument_promotion_pattern_inter_procedure(
+; CHECK-SAME: <2 x ptr> [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[P_CAST_0:%.*]] = addrspacecast <2 x ptr> [[P]] to <2 x ptr addrspace(1)>
+; CHECK-NEXT:    [[P_CAST_1:%.*]] = addrspacecast <2 x ptr addrspace(1)> [[P_CAST_0]] to <2 x ptr>
+; CHECK-NEXT:    call void @use_vec_argument_after_promotion(<2 x ptr> [[P_CAST_1]], i32 [[VAL]])
+; CHECK-NEXT:    ret void
+;
+  %p.cast.0 = addrspacecast <2 x ptr> %p to <2 x ptr addrspace(1)>
+  %p.cast.1 = addrspacecast <2 x ptr addrspace(1)> %p.cast.0 to <2 x ptr>
+  call void @use_vec_argument_after_promotion(<2 x ptr> %p.cast.1, i32 %val)
+  ret void
+}



More information about the llvm-commits mailing list