[llvm] Attributor: Infer noalias.addrspace metadata for memory instructions (PR #136553)

via llvm-commits llvm-commits at lists.llvm.org
Tue May 6 22:41:52 PDT 2025


https://github.com/Shoreshen updated https://github.com/llvm/llvm-project/pull/136553

>From c81c7bfd5bd3db4b2373c4168eaa5ecbec835d08 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Mon, 21 Apr 2025 17:37:48 +0800
Subject: [PATCH 1/6] add noalias.addrspace attribute for store and load

---
 llvm/include/llvm/Transforms/IPO/Attributor.h |  40 +++++
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   |  14 +-
 .../Transforms/IPO/AttributorAttributes.cpp   | 169 ++++++++++++++++++
 .../AMDGPU/attributor-noalias-addrspace.ll    |  87 +++++++++
 4 files changed, 307 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index f82e169da00e3..875305072a429 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6336,6 +6336,46 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
   static const uint32_t InvalidAddressSpace = ~0U;
 };
 
+/// An abstract interface for potential address space information.
+struct AANoAliasAddrSpace
+    : public StateWrapper<BitIntegerState<uint32_t>, AbstractAttribute> {
+  using Base = StateWrapper<BitIntegerState<uint32_t>, AbstractAttribute>;
+  AANoAliasAddrSpace(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+
+  /// See AbstractAttribute::isValidIRPositionForInit
+  static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+    if (!IRP.getAssociatedType()->isPtrOrPtrVectorTy())
+      return false;
+    return AbstractAttribute::isValidIRPositionForInit(A, IRP);
+  }
+
+  /// See AbstractAttribute::requiresCallersForArgOrFunction
+  static bool requiresCallersForArgOrFunction() { return true; }
+
+  /// Create an abstract attribute view for the position \p IRP.
+  static AANoAliasAddrSpace &createForPosition(const IRPosition &IRP,
+                                               Attributor &A);
+  /// See AbstractAttribute::getName()
+  const std::string getName() const override { return "AANoAliasAddrSpace"; }
+
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AAAssumptionInfo
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
+  void setMask(uint32_t mask) {
+    removeKnownBits(~mask);
+    removeAssumedBits(~mask);
+  }
+
+  /// Unique ID (due to the unique address)
+  static const char ID;
+};
+
 struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> {
   AAAllocationInfo(const IRPosition &IRP, Attributor &A)
       : StateWrapper<BooleanState, AbstractAttribute>(IRP) {}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 87fa845f3cff7..0abd641030323 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1349,8 +1349,8 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
        &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
        &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
        &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
-       &AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
-       &AAInstanceInfo::ID});
+       &AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID,
+       &AAIndirectCallInfo::ID, &AAInstanceInfo::ID});
 
   AttributorConfig AC(CGUpdater);
   AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1372,7 +1372,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
   LLVM_DEBUG(dbgs() << "[AMDGPUAttributor] Module " << M.getName() << " is "
                     << (AC.IsClosedWorldModule ? "" : "not ")
                     << "assumed to be a closed world.\n");
-
+  uint32_t AddrSpaceMask = (1 << AMDGPUAS::MAX_AMDGPU_ADDRESS + 1) - 1;
   for (auto *F : Functions) {
     A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));
     A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));
@@ -1390,9 +1390,17 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
       if (auto *LI = dyn_cast<LoadInst>(&I)) {
         A.getOrCreateAAFor<AAAddressSpace>(
             IRPosition::value(*LI->getPointerOperand()));
+        const_cast<AANoAliasAddrSpace *>(
+            A.getOrCreateAAFor<AANoAliasAddrSpace>(
+                IRPosition::value(*LI->getPointerOperand())))
+            ->setMask(AddrSpaceMask);
       } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
         A.getOrCreateAAFor<AAAddressSpace>(
             IRPosition::value(*SI->getPointerOperand()));
+        const_cast<AANoAliasAddrSpace *>(
+            A.getOrCreateAAFor<AANoAliasAddrSpace>(
+                IRPosition::value(*SI->getPointerOperand())))
+            ->setMask(AddrSpaceMask);
       } else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I)) {
         A.getOrCreateAAFor<AAAddressSpace>(
             IRPosition::value(*RMW->getPointerOperand()));
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index ac56df3823e20..ea59701c120e4 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -191,6 +191,7 @@ PIPE_OPERATOR(AAPointerInfo)
 PIPE_OPERATOR(AAAssumptionInfo)
 PIPE_OPERATOR(AAUnderlyingObjects)
 PIPE_OPERATOR(AAAddressSpace)
+PIPE_OPERATOR(AANoAliasAddrSpace)
 PIPE_OPERATOR(AAAllocationInfo)
 PIPE_OPERATOR(AAIndirectCallInfo)
 PIPE_OPERATOR(AAGlobalValueInfo)
@@ -12784,6 +12785,172 @@ struct AAAddressSpaceCallSiteArgument final : AAAddressSpaceImpl {
 };
 } // namespace
 
+/// ------------------------ No Alias Address Space  ---------------------------
+namespace {
+struct AANoAliasAddrSpaceImpl : public AANoAliasAddrSpace {
+  AANoAliasAddrSpaceImpl(const IRPosition &IRP, Attributor &A)
+      : AANoAliasAddrSpace(IRP, A) {}
+
+  void initialize(Attributor &A) override {
+    assert(getAssociatedType()->isPtrOrPtrVectorTy() &&
+           "Associated value is not a pointer");
+
+    if (!A.getInfoCache().getFlatAddressSpace().has_value()) {
+      indicatePessimisticFixpoint();
+      return;
+    }
+
+    unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
+    unsigned AS = getAssociatedType()->getPointerAddressSpace();
+    if (AS != FlatAS) {
+      removeAssumedBits(1 << AS);
+      indicateOptimisticFixpoint();
+    }
+  }
+
+  ChangeStatus updateImpl(Attributor &A) override {
+    unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
+    uint32_t OrigAssumed = getAssumed();
+
+    auto CheckAddressSpace = [&](Value &Obj) {
+      if (isa<UndefValue>(&Obj))
+        return true;
+      // Handle argument in flat address space only has addrspace cast uses
+      if (auto *Arg = dyn_cast<Argument>(&Obj)) {
+        if (Arg->getType()->getPointerAddressSpace() == FlatAS) {
+          for (auto *U : Arg->users()) {
+            auto *ASCI = dyn_cast<AddrSpaceCastInst>(U);
+            if (!ASCI)
+              return false;
+            if (ASCI->getDestAddressSpace() == FlatAS)
+              return false;
+            removeAssumedBits(1 << ASCI->getDestAddressSpace());
+          }
+        }
+      }
+      removeAssumedBits(1 << Obj.getType()->getPointerAddressSpace());
+      return true;
+    };
+
+    auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(getIRPosition(), this,
+                                                        DepClassTy::REQUIRED);
+    if (!AUO->forallUnderlyingObjects(CheckAddressSpace))
+      return indicatePessimisticFixpoint();
+
+    return OrigAssumed == getAssumed() ? ChangeStatus::UNCHANGED
+                                       : ChangeStatus::CHANGED;
+  }
+
+  /// See AbstractAttribute::manifest(...).
+  ChangeStatus manifest(Attributor &A) override {
+    if (!A.getInfoCache().getFlatAddressSpace().has_value())
+      return ChangeStatus::UNCHANGED;
+
+    unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
+    unsigned AS = getAssociatedType()->getPointerAddressSpace();
+    if (AS != FlatAS)
+      return ChangeStatus::UNCHANGED;
+
+    LLVMContext &Ctx = getAssociatedValue().getContext();
+    llvm::MDNode *NoAliasASNode = nullptr;
+    MDBuilder MDB(Ctx);
+    for (unsigned int i = 1; i < 32; i++) {
+      if (i != FlatAS && isAssumed(1 << i)) {
+        if (NoAliasASNode == nullptr) {
+          NoAliasASNode = MDB.createRange(APInt(32, i), APInt(32, i + 1));
+        } else {
+          llvm::MDNode *ASRange =
+              MDB.createRange(APInt(32, i), APInt(32, i + 1));
+          NoAliasASNode = MDNode::getMostGenericRange(NoAliasASNode, ASRange);
+        }
+      }
+    }
+
+    if (!NoAliasASNode || NoAliasASNode->getNumOperands() == 0)
+      return ChangeStatus::UNCHANGED;
+
+    Value *AssociatedValue = &getAssociatedValue();
+    bool Changed = false;
+
+    auto Pred = [&](const Use &U, bool &) {
+      if (U.get() != AssociatedValue)
+        return true;
+      auto *Inst = dyn_cast<Instruction>(U.getUser());
+      if (!Inst)
+        return true;
+      if (!A.isRunOn(Inst->getFunction()))
+        return true;
+      if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst)) {
+        Inst->setMetadata(LLVMContext::MD_noalias_addrspace, NoAliasASNode);
+        Changed = true;
+      }
+      return true;
+    };
+    (void)A.checkForAllUses(Pred, *this, getAssociatedValue(),
+                            /* CheckBBLivenessOnly */ true);
+    return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+  }
+
+  /// See AbstractAttribute::getAsStr().
+  const std::string getAsStr(Attributor *A) const override {
+    if (!isValidState())
+      return "noaliasaddrspace(<invalid>)";
+    std::string Str;
+    raw_string_ostream OS(Str);
+    OS << "noaliasaddrspace(";
+    for (unsigned int i = 1; i < 32; i++)
+      if (isAssumed(1 << i))
+        OS << ' ' << i;
+    OS << " )";
+    return OS.str();
+  }
+};
+
+struct AANoAliasAddrSpaceFloating final : AANoAliasAddrSpaceImpl {
+  AANoAliasAddrSpaceFloating(const IRPosition &IRP, Attributor &A)
+      : AANoAliasAddrSpaceImpl(IRP, A) {}
+
+  void trackStatistics() const override {
+    STATS_DECLTRACK_FLOATING_ATTR(noaliasaddrspace);
+  }
+};
+
+struct AANoAliasAddrSpaceReturned final : AANoAliasAddrSpaceImpl {
+  AANoAliasAddrSpaceReturned(const IRPosition &IRP, Attributor &A)
+      : AANoAliasAddrSpaceImpl(IRP, A) {}
+
+  void trackStatistics() const override {
+    STATS_DECLTRACK_FNRET_ATTR(noaliasaddrspace);
+  }
+};
+
+struct AANoAliasAddrSpaceCallSiteReturned final : AANoAliasAddrSpaceImpl {
+  AANoAliasAddrSpaceCallSiteReturned(const IRPosition &IRP, Attributor &A)
+      : AANoAliasAddrSpaceImpl(IRP, A) {}
+
+  void trackStatistics() const override {
+    STATS_DECLTRACK_CSRET_ATTR(noaliasaddrspace);
+  }
+};
+
+struct AANoAliasAddrSpaceArgument final : AANoAliasAddrSpaceImpl {
+  AANoAliasAddrSpaceArgument(const IRPosition &IRP, Attributor &A)
+      : AANoAliasAddrSpaceImpl(IRP, A) {}
+
+  void trackStatistics() const override {
+    STATS_DECLTRACK_ARG_ATTR(noaliasaddrspace);
+  }
+};
+
+struct AANoAliasAddrSpaceCallSiteArgument final : AANoAliasAddrSpaceImpl {
+  AANoAliasAddrSpaceCallSiteArgument(const IRPosition &IRP, Attributor &A)
+      : AANoAliasAddrSpaceImpl(IRP, A) {}
+
+  void trackStatistics() const override {
+    STATS_DECLTRACK_CSARG_ATTR(noaliasaddrspace);
+  }
+};
+} // namespace
 /// ----------- Allocation Info ----------
 namespace {
 struct AAAllocationInfoImpl : public AAAllocationInfo {
@@ -13037,6 +13204,7 @@ const char AAPointerInfo::ID = 0;
 const char AAAssumptionInfo::ID = 0;
 const char AAUnderlyingObjects::ID = 0;
 const char AAAddressSpace::ID = 0;
+const char AANoAliasAddrSpace::ID = 0;
 const char AAAllocationInfo::ID = 0;
 const char AAIndirectCallInfo::ID = 0;
 const char AAGlobalValueInfo::ID = 0;
@@ -13171,6 +13339,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFPClass)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAddressSpace)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAliasAddrSpace)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAllocationInfo)
 
 CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify)
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll b/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
new file mode 100644
index 0000000000000..55d31784b7cdd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck %s
+
+define amdgpu_kernel void @no_alias_addr_space_select(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, ptr addrspace(5) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_select(
+; CHECK-SAME: ptr addrspace(1) [[GPTR:%.*]], ptr addrspace(3) [[LPTR:%.*]], ptr addrspace(5) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[A:%.*]] = addrspacecast ptr addrspace(1) [[GPTR]] to ptr
+; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(3) [[LPTR]] to ptr
+; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr addrspace(5) [[SPTR]] to ptr
+; CHECK-NEXT:    [[ADD_A:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[OFFSET]]
+; CHECK-NEXT:    [[PTR:%.*]] = select i1 [[COND1]], ptr [[ADD_A]], ptr [[B]]
+; CHECK-NEXT:    [[PTR2:%.*]] = select i1 [[COND2]], ptr [[PTR]], ptr [[C]]
+; CHECK-NEXT:    store i32 [[VAL]], ptr [[PTR2]], align 4, !noalias.addrspace [[META0:![0-9]+]]
+; CHECK-NEXT:    ret void
+;
+  %a = addrspacecast ptr addrspace(1) %gptr to ptr
+  %b = addrspacecast ptr addrspace(3) %lptr to ptr
+  %c = addrspacecast ptr addrspace(5) %sptr to ptr
+  %add_a = getelementptr inbounds i8, ptr %a, i32 %offset
+  %ptr = select i1 %cond1, ptr %add_a, ptr %b
+  %ptr2 = select i1 %cond2, ptr %ptr, ptr %c
+  store i32 %val, ptr %ptr2
+  ret void
+}
+
+define amdgpu_kernel void @no_alias_addr_space_arg(ptr %gptr, i32 %val, i1 %cond) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_arg(
+; CHECK-SAME: ptr [[GPTR:%.*]], i32 [[VAL:%.*]], i1 [[COND:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    store i32 [[VAL]], ptr [[GPTR]], align 4, !noalias.addrspace [[META1:![0-9]+]]
+; CHECK-NEXT:    ret void
+;
+  %a = addrspacecast ptr %gptr to ptr addrspace(5)
+  %b = addrspacecast ptr %gptr to ptr addrspace(7)
+  %ptr_a = addrspacecast ptr addrspace(5) %a to ptr
+  %ptr_b = addrspacecast ptr addrspace(7) %b to ptr
+  %ptr = select i1 %cond, ptr %ptr_a, ptr %ptr_b
+  store i32 %val, ptr %ptr
+  ret void
+}
+
+define amdgpu_kernel void @no_alias_addr_space_branch(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, ptr addrspace(5) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_branch(
+; CHECK-SAME: ptr addrspace(1) [[GPTR:%.*]], ptr addrspace(3) [[LPTR:%.*]], ptr addrspace(5) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    br i1 [[COND1]], label %[[BB_1_TRUE:.*]], label %[[BB_1_FALSE:.*]]
+; CHECK:       [[BB_1_TRUE]]:
+; CHECK-NEXT:    [[A:%.*]] = addrspacecast ptr addrspace(1) [[GPTR]] to ptr
+; CHECK-NEXT:    br label %[[BB_1_END:.*]]
+; CHECK:       [[BB_1_FALSE]]:
+; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(3) [[LPTR]] to ptr
+; CHECK-NEXT:    br label %[[BB_1_END]]
+; CHECK:       [[BB_1_END]]:
+; CHECK-NEXT:    [[PTR1:%.*]] = phi ptr [ [[A]], %[[BB_1_TRUE]] ], [ [[B]], %[[BB_1_FALSE]] ]
+; CHECK-NEXT:    br i1 [[COND2]], label %[[BB_2_TRUE:.*]], label %[[BB_2_END:.*]]
+; CHECK:       [[BB_2_TRUE]]:
+; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr addrspace(5) [[SPTR]] to ptr
+; CHECK-NEXT:    br label %[[BB_2_END]]
+; CHECK:       [[BB_2_END]]:
+; CHECK-NEXT:    [[PTR2:%.*]] = phi ptr [ [[PTR1]], %[[BB_1_END]] ], [ [[C]], %[[BB_2_TRUE]] ]
+; CHECK-NEXT:    store i32 [[VAL]], ptr [[PTR2]], align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    ret void
+;
+  br i1 %cond1, label %bb.1.true, label %bb.1.false
+bb.1.true:
+  %a = addrspacecast ptr addrspace(1) %gptr to ptr
+  br label %bb.1.end
+
+bb.1.false:
+  %b = addrspacecast ptr addrspace(3) %lptr to ptr
+  br label %bb.1.end
+
+bb.1.end:
+  %ptr1 = phi ptr [ %a, %bb.1.true ], [ %b, %bb.1.false ]
+  br i1 %cond2, label %bb.2.true, label %bb.2.end
+
+bb.2.true:
+  %c = addrspacecast ptr addrspace(5) %sptr to ptr
+  br label %bb.2.end
+
+bb.2.end:
+  %ptr2 = phi ptr [ %ptr1, %bb.1.end ], [ %c, %bb.2.true ]
+  store i32 %val, ptr %ptr2
+  ret void
+}
+;.
+; CHECK: [[META0]] = !{i32 2, i32 3, i32 4, i32 5, i32 6, i32 10}
+; CHECK: [[META1]] = !{i32 1, i32 5, i32 6, i32 7, i32 8, i32 10}
+;.

>From e895abd657b01b680cb84a65b8a1a05cb95920de Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 22 Apr 2025 11:02:52 +0800
Subject: [PATCH 2/6] fix comments

---
 llvm/include/llvm/Transforms/IPO/Attributor.h |   6 +-
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   |  30 ++-
 .../Transforms/IPO/AttributorAttributes.cpp   |   5 +-
 .../AMDGPU/attributor-noalias-addrspace.ll    | 247 ++++++++++++++++--
 4 files changed, 252 insertions(+), 36 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 875305072a429..67bd1925aadf3 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6367,9 +6367,9 @@ struct AANoAliasAddrSpace
     return (AA->getIdAddr() == &ID);
   }
 
-  void setMask(uint32_t mask) {
-    removeKnownBits(~mask);
-    removeAssumedBits(~mask);
+  void setMask(uint32_t Mask) {
+    removeKnownBits(~Mask);
+    removeAssumedBits(~Mask);
   }
 
   /// Unique ID (due to the unique address)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 0abd641030323..5bd104bfc99cb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1372,7 +1372,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
   LLVM_DEBUG(dbgs() << "[AMDGPUAttributor] Module " << M.getName() << " is "
                     << (AC.IsClosedWorldModule ? "" : "not ")
                     << "assumed to be a closed world.\n");
-  uint32_t AddrSpaceMask = (1 << AMDGPUAS::MAX_AMDGPU_ADDRESS + 1) - 1;
+  uint32_t AddrSpaceMask = (1 << (AMDGPUAS::MAX_AMDGPU_ADDRESS + 1)) - 1;
   for (auto *F : Functions) {
     A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));
     A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));
@@ -1388,25 +1388,29 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
 
     for (auto &I : instructions(F)) {
       if (auto *LI = dyn_cast<LoadInst>(&I)) {
-        A.getOrCreateAAFor<AAAddressSpace>(
-            IRPosition::value(*LI->getPointerOperand()));
+        Value &Ptr = *(LI->getPointerOperand());
+        A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(Ptr));
         const_cast<AANoAliasAddrSpace *>(
-            A.getOrCreateAAFor<AANoAliasAddrSpace>(
-                IRPosition::value(*LI->getPointerOperand())))
+            A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(Ptr)))
             ->setMask(AddrSpaceMask);
       } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
-        A.getOrCreateAAFor<AAAddressSpace>(
-            IRPosition::value(*SI->getPointerOperand()));
+        Value &Ptr = *(SI->getPointerOperand());
+        A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(Ptr));
         const_cast<AANoAliasAddrSpace *>(
-            A.getOrCreateAAFor<AANoAliasAddrSpace>(
-                IRPosition::value(*SI->getPointerOperand())))
+            A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(Ptr)))
             ->setMask(AddrSpaceMask);
       } else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I)) {
-        A.getOrCreateAAFor<AAAddressSpace>(
-            IRPosition::value(*RMW->getPointerOperand()));
+        Value &Ptr = *(RMW->getPointerOperand());
+        A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(Ptr));
+        const_cast<AANoAliasAddrSpace *>(
+            A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(Ptr)))
+            ->setMask(AddrSpaceMask);
       } else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I)) {
-        A.getOrCreateAAFor<AAAddressSpace>(
-            IRPosition::value(*CmpX->getPointerOperand()));
+        Value &Ptr = *(CmpX->getPointerOperand());
+        A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(Ptr));
+        const_cast<AANoAliasAddrSpace *>(
+            A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(Ptr)))
+            ->setMask(AddrSpaceMask);
       }
     }
   }
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index ea59701c120e4..9d739c7446319 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12813,7 +12813,7 @@ struct AANoAliasAddrSpaceImpl : public AANoAliasAddrSpace {
     uint32_t OrigAssumed = getAssumed();
 
     auto CheckAddressSpace = [&](Value &Obj) {
-      if (isa<UndefValue>(&Obj))
+      if (isa<UndefValue>(&Obj) || isa<PoisonValue>(&Obj))
         return true;
       // Handle argument in flat address space only has addrspace cast uses
       if (auto *Arg = dyn_cast<Argument>(&Obj)) {
@@ -12880,7 +12880,8 @@ struct AANoAliasAddrSpaceImpl : public AANoAliasAddrSpace {
         return true;
       if (!A.isRunOn(Inst->getFunction()))
         return true;
-      if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst)) {
+      if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst) ||
+          isa<AtomicCmpXchgInst>(Inst) || isa<AtomicRMWInst>(Inst)) {
         Inst->setMetadata(LLVMContext::MD_noalias_addrspace, NoAliasASNode);
         Changed = true;
       }
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll b/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
index 55d31784b7cdd..759004ffcc1fd 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
@@ -1,19 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck %s
 
-define amdgpu_kernel void @no_alias_addr_space_select(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, ptr addrspace(5) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+ at gptr = protected addrspace(1) externally_initialized global i32 0, align 4
+
+define amdgpu_kernel void @no_alias_addr_space_select(ptr addrspace(5) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_select(
-; CHECK-SAME: ptr addrspace(1) [[GPTR:%.*]], ptr addrspace(3) [[LPTR:%.*]], ptr addrspace(5) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[A:%.*]] = addrspacecast ptr addrspace(1) [[GPTR]] to ptr
+; CHECK-SAME: ptr addrspace(5) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(3)
 ; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(3) [[LPTR]] to ptr
 ; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr addrspace(5) [[SPTR]] to ptr
-; CHECK-NEXT:    [[ADD_A:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[OFFSET]]
+; CHECK-NEXT:    [[ADD_A:%.*]] = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(1) @gptr to ptr), i32 [[OFFSET]]
 ; CHECK-NEXT:    [[PTR:%.*]] = select i1 [[COND1]], ptr [[ADD_A]], ptr [[B]]
 ; CHECK-NEXT:    [[PTR2:%.*]] = select i1 [[COND2]], ptr [[PTR]], ptr [[C]]
 ; CHECK-NEXT:    store i32 [[VAL]], ptr [[PTR2]], align 4, !noalias.addrspace [[META0:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
-  %a = addrspacecast ptr addrspace(1) %gptr to ptr
+  %lptr = alloca i32, align 4, addrspace(3)
+  %a = addrspacecast ptr addrspace(1) @gptr to ptr
   %b = addrspacecast ptr addrspace(3) %lptr to ptr
   %c = addrspacecast ptr addrspace(5) %sptr to ptr
   %add_a = getelementptr inbounds i8, ptr %a, i32 %offset
@@ -23,29 +26,30 @@ define amdgpu_kernel void @no_alias_addr_space_select(ptr addrspace(1) %gptr, pt
   ret void
 }
 
-define amdgpu_kernel void @no_alias_addr_space_arg(ptr %gptr, i32 %val, i1 %cond) #0 {
+define amdgpu_kernel void @no_alias_addr_space_arg(ptr %ptr, i32 %val, i1 %cond) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_arg(
-; CHECK-SAME: ptr [[GPTR:%.*]], i32 [[VAL:%.*]], i1 [[COND:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    store i32 [[VAL]], ptr [[GPTR]], align 4, !noalias.addrspace [[META1:![0-9]+]]
+; CHECK-SAME: ptr [[PTR:%.*]], i32 [[VAL:%.*]], i1 [[COND:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    store i32 [[VAL]], ptr [[PTR]], align 4, !noalias.addrspace [[META1:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
-  %a = addrspacecast ptr %gptr to ptr addrspace(5)
-  %b = addrspacecast ptr %gptr to ptr addrspace(7)
-  %ptr_a = addrspacecast ptr addrspace(5) %a to ptr
-  %ptr_b = addrspacecast ptr addrspace(7) %b to ptr
-  %ptr = select i1 %cond, ptr %ptr_a, ptr %ptr_b
-  store i32 %val, ptr %ptr
+  %cast.ptr.a = addrspacecast ptr %ptr to ptr addrspace(5)
+  %cast.ptr.b = addrspacecast ptr %ptr to ptr addrspace(7)
+  %ptr.a = addrspacecast ptr addrspace(5) %cast.ptr.a to ptr
+  %ptr.b = addrspacecast ptr addrspace(7) %cast.ptr.b to ptr
+  %sel.ptr = select i1 %cond, ptr %ptr.a, ptr %ptr.b
+  store i32 %val, ptr %sel.ptr
   ret void
 }
 
-define amdgpu_kernel void @no_alias_addr_space_branch(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, ptr addrspace(5) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+define amdgpu_kernel void @no_alias_addr_space_branch(ptr addrspace(5) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_branch(
-; CHECK-SAME: ptr addrspace(1) [[GPTR:%.*]], ptr addrspace(3) [[LPTR:%.*]], ptr addrspace(5) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(5) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    br i1 [[COND1]], label %[[BB_1_TRUE:.*]], label %[[BB_1_FALSE:.*]]
 ; CHECK:       [[BB_1_TRUE]]:
-; CHECK-NEXT:    [[A:%.*]] = addrspacecast ptr addrspace(1) [[GPTR]] to ptr
+; CHECK-NEXT:    [[A:%.*]] = addrspacecast ptr addrspace(1) @gptr to ptr
 ; CHECK-NEXT:    br label %[[BB_1_END:.*]]
 ; CHECK:       [[BB_1_FALSE]]:
+; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(3)
 ; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(3) [[LPTR]] to ptr
 ; CHECK-NEXT:    br label %[[BB_1_END]]
 ; CHECK:       [[BB_1_END]]:
@@ -61,10 +65,11 @@ define amdgpu_kernel void @no_alias_addr_space_branch(ptr addrspace(1) %gptr, pt
 ;
   br i1 %cond1, label %bb.1.true, label %bb.1.false
 bb.1.true:
-  %a = addrspacecast ptr addrspace(1) %gptr to ptr
+  %a = addrspacecast ptr addrspace(1) @gptr to ptr
   br label %bb.1.end
 
 bb.1.false:
+  %lptr = alloca i32, align 4, addrspace(3)
   %b = addrspacecast ptr addrspace(3) %lptr to ptr
   br label %bb.1.end
 
@@ -81,6 +86,212 @@ bb.2.end:
   store i32 %val, ptr %ptr2
   ret void
 }
+
+define amdgpu_kernel void @no_alias_addr_space_select_cmpxchg(ptr addrspace(5) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_select_cmpxchg(
+; CHECK-SAME: ptr addrspace(5) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(3)
+; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(3) [[LPTR]] to ptr
+; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr addrspace(5) [[SPTR]] to ptr
+; CHECK-NEXT:    [[ADD_A:%.*]] = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(1) @gptr to ptr), i32 [[OFFSET]]
+; CHECK-NEXT:    [[PTR:%.*]] = select i1 [[COND1]], ptr [[ADD_A]], ptr [[B]]
+; CHECK-NEXT:    [[PTR2:%.*]] = select i1 [[COND2]], ptr [[PTR]], ptr [[C]]
+; CHECK-NEXT:    [[CMPXCHG_0:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 4 monotonic monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[CMPXCHG_1:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 5 acq_rel monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[CMPXCHG_2:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 6 acquire monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[CMPXCHG_3:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 7 release monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[CMPXCHG_4:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 8 seq_cst monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[CMPXCHG_5:%.*]] = cmpxchg weak ptr [[PTR2]], i32 0, i32 9 seq_cst monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[CMPXCHG_6:%.*]] = cmpxchg volatile ptr [[PTR2]], i32 0, i32 10 seq_cst monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[CMPXCHG_7:%.*]] = cmpxchg weak volatile ptr [[PTR2]], i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    ret void
+;
+  %lptr = alloca i32, align 4, addrspace(3)
+  %a = addrspacecast ptr addrspace(1) @gptr to ptr
+  %b = addrspacecast ptr addrspace(3) %lptr to ptr
+  %c = addrspacecast ptr addrspace(5) %sptr to ptr
+  %add_a = getelementptr inbounds i8, ptr %a, i32 %offset
+  %ptr = select i1 %cond1, ptr %add_a, ptr %b
+  %ptr2 = select i1 %cond2, ptr %ptr, ptr %c
+  %cmpxchg.0 = cmpxchg ptr %ptr2, i32 0, i32 4 monotonic monotonic, align 4
+  %cmpxchg.1 = cmpxchg ptr %ptr2, i32 0, i32 5 acq_rel monotonic, align 4
+  %cmpxchg.2 = cmpxchg ptr %ptr2, i32 0, i32 6 acquire monotonic, align 4
+  %cmpxchg.3 = cmpxchg ptr %ptr2, i32 0, i32 7 release monotonic, align 4
+  %cmpxchg.4 = cmpxchg ptr %ptr2, i32 0, i32 8 seq_cst monotonic, align 4
+  %cmpxchg.5 = cmpxchg weak ptr %ptr2, i32 0, i32 9 seq_cst monotonic, align 4
+  %cmpxchg.6 = cmpxchg volatile ptr %ptr2, i32 0, i32 10 seq_cst monotonic, align 4
+  %cmpxchg.7 = cmpxchg weak volatile ptr %ptr2, i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4
+  ret void
+}
+
+define amdgpu_kernel void @no_alias_addr_space_branch_cmpxchg(ptr addrspace(5) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_branch_cmpxchg(
+; CHECK-SAME: ptr addrspace(5) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    br i1 [[COND1]], label %[[BB_1_TRUE:.*]], label %[[BB_1_FALSE:.*]]
+; CHECK:       [[BB_1_TRUE]]:
+; CHECK-NEXT:    [[A:%.*]] = addrspacecast ptr addrspace(1) @gptr to ptr
+; CHECK-NEXT:    br label %[[BB_1_END:.*]]
+; CHECK:       [[BB_1_FALSE]]:
+; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(3)
+; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(3) [[LPTR]] to ptr
+; CHECK-NEXT:    br label %[[BB_1_END]]
+; CHECK:       [[BB_1_END]]:
+; CHECK-NEXT:    [[PTR1:%.*]] = phi ptr [ [[A]], %[[BB_1_TRUE]] ], [ [[B]], %[[BB_1_FALSE]] ]
+; CHECK-NEXT:    br i1 [[COND2]], label %[[BB_2_TRUE:.*]], label %[[BB_2_END:.*]]
+; CHECK:       [[BB_2_TRUE]]:
+; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr addrspace(5) [[SPTR]] to ptr
+; CHECK-NEXT:    br label %[[BB_2_END]]
+; CHECK:       [[BB_2_END]]:
+; CHECK-NEXT:    [[PTR2:%.*]] = phi ptr [ [[PTR1]], %[[BB_1_END]] ], [ [[C]], %[[BB_2_TRUE]] ]
+; CHECK-NEXT:    [[CMPXCHG_0:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 4 monotonic monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[CMPXCHG_1:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 5 acq_rel monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[CMPXCHG_2:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 6 acquire monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[CMPXCHG_3:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 7 release monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[CMPXCHG_4:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 8 seq_cst monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[CMPXCHG_5:%.*]] = cmpxchg weak ptr [[PTR2]], i32 0, i32 9 seq_cst monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[CMPXCHG_6:%.*]] = cmpxchg volatile ptr [[PTR2]], i32 0, i32 10 seq_cst monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[CMPXCHG_7:%.*]] = cmpxchg weak volatile ptr [[PTR2]], i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    ret void
+;
+  br i1 %cond1, label %bb.1.true, label %bb.1.false
+bb.1.true:
+  %a = addrspacecast ptr addrspace(1) @gptr to ptr
+  br label %bb.1.end
+
+bb.1.false:
+  %lptr = alloca i32, align 4, addrspace(3)
+  %b = addrspacecast ptr addrspace(3) %lptr to ptr
+  br label %bb.1.end
+
+bb.1.end:
+  %ptr1 = phi ptr [ %a, %bb.1.true ], [ %b, %bb.1.false ]
+  br i1 %cond2, label %bb.2.true, label %bb.2.end
+
+bb.2.true:
+  %c = addrspacecast ptr addrspace(5) %sptr to ptr
+  br label %bb.2.end
+
+bb.2.end:
+  %ptr2 = phi ptr [ %ptr1, %bb.1.end ], [ %c, %bb.2.true ]
+  %cmpxchg.0 = cmpxchg ptr %ptr2, i32 0, i32 4 monotonic monotonic, align 4
+  %cmpxchg.1 = cmpxchg ptr %ptr2, i32 0, i32 5 acq_rel monotonic, align 4
+  %cmpxchg.2 = cmpxchg ptr %ptr2, i32 0, i32 6 acquire monotonic, align 4
+  %cmpxchg.3 = cmpxchg ptr %ptr2, i32 0, i32 7 release monotonic, align 4
+  %cmpxchg.4 = cmpxchg ptr %ptr2, i32 0, i32 8 seq_cst monotonic, align 4
+  %cmpxchg.5 = cmpxchg weak ptr %ptr2, i32 0, i32 9 seq_cst monotonic, align 4
+  %cmpxchg.6 = cmpxchg volatile ptr %ptr2, i32 0, i32 10 seq_cst monotonic, align 4
+  %cmpxchg.7 = cmpxchg weak volatile ptr %ptr2, i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4
+  ret void
+}
+
+define amdgpu_kernel void @no_alias_addr_space_select_atomicrmw(ptr addrspace(5) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_select_atomicrmw(
+; CHECK-SAME: ptr addrspace(5) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(3)
+; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(3) [[LPTR]] to ptr
+; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr addrspace(5) [[SPTR]] to ptr
+; CHECK-NEXT:    [[ADD_A:%.*]] = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(1) @gptr to ptr), i32 [[OFFSET]]
+; CHECK-NEXT:    [[PTR:%.*]] = select i1 [[COND1]], ptr [[ADD_A]], ptr [[B]]
+; CHECK-NEXT:    [[PTR2:%.*]] = select i1 [[COND2]], ptr [[PTR]], ptr [[C]]
+; CHECK-NEXT:    [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr [[PTR2]], i32 12 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_ADD:%.*]] = atomicrmw add ptr [[PTR2]], i32 13 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_SUB:%.*]] = atomicrmw sub ptr [[PTR2]], i32 14 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_AND:%.*]] = atomicrmw and ptr [[PTR2]], i32 15 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_NAND:%.*]] = atomicrmw nand ptr [[PTR2]], i32 16 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_OR:%.*]] = atomicrmw or ptr [[PTR2]], i32 17 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_XOR:%.*]] = atomicrmw xor ptr [[PTR2]], i32 18 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_MAX:%.*]] = atomicrmw max ptr [[PTR2]], i32 19 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_MIN:%.*]] = atomicrmw volatile min ptr [[PTR2]], i32 20 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_UMAX:%.*]] = atomicrmw umax ptr [[PTR2]], i32 21 syncscope("singlethread") monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_UMIN:%.*]] = atomicrmw volatile umin ptr [[PTR2]], i32 22 syncscope("singlethread") monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    ret void
+;
+  %lptr = alloca i32, align 4, addrspace(3)
+  %a = addrspacecast ptr addrspace(1) @gptr to ptr
+  %b = addrspacecast ptr addrspace(3) %lptr to ptr
+  %c = addrspacecast ptr addrspace(5) %sptr to ptr
+  %add_a = getelementptr inbounds i8, ptr %a, i32 %offset
+  %ptr = select i1 %cond1, ptr %add_a, ptr %b
+  %ptr2 = select i1 %cond2, ptr %ptr, ptr %c
+  %atomicrmw.xchg = atomicrmw xchg ptr %ptr2, i32 12 monotonic, align 4
+  %atomicrmw.add = atomicrmw add ptr %ptr2, i32 13 monotonic, align 4
+  %atomicrmw.sub = atomicrmw sub ptr %ptr2, i32 14 monotonic, align 4
+  %atomicrmw.and = atomicrmw and ptr %ptr2, i32 15 monotonic, align 4
+  %atomicrmw.nand = atomicrmw nand ptr %ptr2, i32 16 monotonic, align 4
+  %atomicrmw.or = atomicrmw or ptr %ptr2, i32 17 monotonic, align 4
+  %atomicrmw.xor = atomicrmw xor ptr %ptr2, i32 18 monotonic, align 4
+  %atomicrmw.max = atomicrmw max ptr %ptr2, i32 19 monotonic, align 4
+  %atomicrmw.min = atomicrmw volatile min ptr %ptr2, i32 20 monotonic, align 4
+  %atomicrmw.umax = atomicrmw umax ptr %ptr2, i32 21 syncscope("singlethread") monotonic, align 4
+  %atomicrmw.umin = atomicrmw volatile umin ptr %ptr2, i32 22 syncscope("singlethread") monotonic, align 4
+  ret void
+}
+
+define amdgpu_kernel void @no_alias_addr_space_branch_atomicrmw(ptr addrspace(5) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_branch_atomicrmw(
+; CHECK-SAME: ptr addrspace(5) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    br i1 [[COND1]], label %[[BB_1_TRUE:.*]], label %[[BB_1_FALSE:.*]]
+; CHECK:       [[BB_1_TRUE]]:
+; CHECK-NEXT:    [[A:%.*]] = addrspacecast ptr addrspace(1) @gptr to ptr
+; CHECK-NEXT:    br label %[[BB_1_END:.*]]
+; CHECK:       [[BB_1_FALSE]]:
+; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(3)
+; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(3) [[LPTR]] to ptr
+; CHECK-NEXT:    br label %[[BB_1_END]]
+; CHECK:       [[BB_1_END]]:
+; CHECK-NEXT:    [[PTR1:%.*]] = phi ptr [ [[A]], %[[BB_1_TRUE]] ], [ [[B]], %[[BB_1_FALSE]] ]
+; CHECK-NEXT:    br i1 [[COND2]], label %[[BB_2_TRUE:.*]], label %[[BB_2_END:.*]]
+; CHECK:       [[BB_2_TRUE]]:
+; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr addrspace(5) [[SPTR]] to ptr
+; CHECK-NEXT:    br label %[[BB_2_END]]
+; CHECK:       [[BB_2_END]]:
+; CHECK-NEXT:    [[PTR2:%.*]] = phi ptr [ [[PTR1]], %[[BB_1_END]] ], [ [[C]], %[[BB_2_TRUE]] ]
+; CHECK-NEXT:    [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr [[PTR2]], i32 12 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_ADD:%.*]] = atomicrmw add ptr [[PTR2]], i32 13 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_SUB:%.*]] = atomicrmw sub ptr [[PTR2]], i32 14 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_AND:%.*]] = atomicrmw and ptr [[PTR2]], i32 15 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_NAND:%.*]] = atomicrmw nand ptr [[PTR2]], i32 16 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_OR:%.*]] = atomicrmw or ptr [[PTR2]], i32 17 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_XOR:%.*]] = atomicrmw xor ptr [[PTR2]], i32 18 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_MAX:%.*]] = atomicrmw max ptr [[PTR2]], i32 19 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_MIN:%.*]] = atomicrmw volatile min ptr [[PTR2]], i32 20 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_UMAX:%.*]] = atomicrmw umax ptr [[PTR2]], i32 21 syncscope("singlethread") monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_UMIN:%.*]] = atomicrmw volatile umin ptr [[PTR2]], i32 22 syncscope("singlethread") monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    ret void
+;
+  br i1 %cond1, label %bb.1.true, label %bb.1.false
+bb.1.true:
+  %a = addrspacecast ptr addrspace(1) @gptr to ptr
+  br label %bb.1.end
+
+bb.1.false:
+  %lptr = alloca i32, align 4, addrspace(3)
+  %b = addrspacecast ptr addrspace(3) %lptr to ptr
+  br label %bb.1.end
+
+bb.1.end:
+  %ptr1 = phi ptr [ %a, %bb.1.true ], [ %b, %bb.1.false ]
+  br i1 %cond2, label %bb.2.true, label %bb.2.end
+
+bb.2.true:
+  %c = addrspacecast ptr addrspace(5) %sptr to ptr
+  br label %bb.2.end
+
+bb.2.end:
+  %ptr2 = phi ptr [ %ptr1, %bb.1.end ], [ %c, %bb.2.true ]
+  %atomicrmw.xchg = atomicrmw xchg ptr %ptr2, i32 12 monotonic, align 4
+  %atomicrmw.add = atomicrmw add ptr %ptr2, i32 13 monotonic, align 4
+  %atomicrmw.sub = atomicrmw sub ptr %ptr2, i32 14 monotonic, align 4
+  %atomicrmw.and = atomicrmw and ptr %ptr2, i32 15 monotonic, align 4
+  %atomicrmw.nand = atomicrmw nand ptr %ptr2, i32 16 monotonic, align 4
+  %atomicrmw.or = atomicrmw or ptr %ptr2, i32 17 monotonic, align 4
+  %atomicrmw.xor = atomicrmw xor ptr %ptr2, i32 18 monotonic, align 4
+  %atomicrmw.max = atomicrmw max ptr %ptr2, i32 19 monotonic, align 4
+  %atomicrmw.min = atomicrmw volatile min ptr %ptr2, i32 20 monotonic, align 4
+  %atomicrmw.umax = atomicrmw umax ptr %ptr2, i32 21 syncscope("singlethread") monotonic, align 4
+  %atomicrmw.umin = atomicrmw volatile umin ptr %ptr2, i32 22 syncscope("singlethread") monotonic, align 4
+  ret void
+}
 ;.
 ; CHECK: [[META0]] = !{i32 2, i32 3, i32 4, i32 5, i32 6, i32 10}
 ; CHECK: [[META1]] = !{i32 1, i32 5, i32 6, i32 7, i32 8, i32 10}

>From 03194c7808e067a7f76619af06a3f1bea4c7c977 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Fri, 25 Apr 2025 13:01:56 +0800
Subject: [PATCH 3/6] fix comments, use range and boolean state

---
 llvm/include/llvm/Transforms/IPO/Attributor.h | 22 +++--
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   |  9 +-
 .../Transforms/IPO/AttributorAttributes.cpp   | 87 ++++++++++++++-----
 3 files changed, 85 insertions(+), 33 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 67bd1925aadf3..de531925ff861 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6338,8 +6338,8 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
 
 /// An abstract interface for potential address space information.
 struct AANoAliasAddrSpace
-    : public StateWrapper<BitIntegerState<uint32_t>, AbstractAttribute> {
-  using Base = StateWrapper<BitIntegerState<uint32_t>, AbstractAttribute>;
+    : public StateWrapper<BooleanState, AbstractAttribute> {
+  using Base = StateWrapper<BooleanState, AbstractAttribute>;
   AANoAliasAddrSpace(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
 
   /// See AbstractAttribute::isValidIRPositionForInit
@@ -6367,13 +6367,25 @@ struct AANoAliasAddrSpace
     return (AA->getIdAddr() == &ID);
   }
 
-  void setMask(uint32_t Mask) {
-    removeKnownBits(~Mask);
-    removeAssumedBits(~Mask);
+  void setMaxAddrSpace(unsigned MaxAS) {
+    MaxAddrSpace = MaxAS;
+    for (auto it = ASRanges.begin(); it != ASRanges.end();) {
+      if (it->first > MaxAS) {
+        it = ASRanges.erase(it);
+      } else if (it->second > MaxAS + 1) {
+        it->second = MaxAS + 1;
+      } else {
+        it++;
+      }
+    }
   }
 
   /// Unique ID (due to the unique address)
   static const char ID;
+
+protected:
+  SmallVector<std::pair<unsigned, unsigned>> ASRanges;
+  unsigned MaxAddrSpace = ~0;
 };
 
 struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 5bd104bfc99cb..59c5682024716 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1372,7 +1372,6 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
   LLVM_DEBUG(dbgs() << "[AMDGPUAttributor] Module " << M.getName() << " is "
                     << (AC.IsClosedWorldModule ? "" : "not ")
                     << "assumed to be a closed world.\n");
-  uint32_t AddrSpaceMask = (1 << (AMDGPUAS::MAX_AMDGPU_ADDRESS + 1)) - 1;
   for (auto *F : Functions) {
     A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));
     A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));
@@ -1392,25 +1391,25 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
         A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(Ptr));
         const_cast<AANoAliasAddrSpace *>(
             A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(Ptr)))
-            ->setMask(AddrSpaceMask);
+            ->setMaxAddrSpace(AMDGPUAS::MAX_AMDGPU_ADDRESS);
       } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
         Value &Ptr = *(SI->getPointerOperand());
         A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(Ptr));
         const_cast<AANoAliasAddrSpace *>(
             A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(Ptr)))
-            ->setMask(AddrSpaceMask);
+            ->setMaxAddrSpace(AMDGPUAS::MAX_AMDGPU_ADDRESS);
       } else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I)) {
         Value &Ptr = *(RMW->getPointerOperand());
         A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(Ptr));
         const_cast<AANoAliasAddrSpace *>(
             A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(Ptr)))
-            ->setMask(AddrSpaceMask);
+            ->setMaxAddrSpace(AMDGPUAS::MAX_AMDGPU_ADDRESS);
       } else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I)) {
         Value &Ptr = *(CmpX->getPointerOperand());
         A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(Ptr));
         const_cast<AANoAliasAddrSpace *>(
             A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(Ptr)))
-            ->setMask(AddrSpaceMask);
+            ->setMaxAddrSpace(AMDGPUAS::MAX_AMDGPU_ADDRESS);
       }
     }
   }
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 542f858e6a54d..a0957852ffffb 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12796,14 +12796,18 @@ struct AANoAliasAddrSpaceImpl : public AANoAliasAddrSpace {
            "Associated value is not a pointer");
 
     if (!A.getInfoCache().getFlatAddressSpace().has_value()) {
+      resetASRanges();
       indicatePessimisticFixpoint();
       return;
     }
 
     unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
+    resetASRanges();
+    removeAS(FlatAS);
+
     unsigned AS = getAssociatedType()->getPointerAddressSpace();
     if (AS != FlatAS) {
-      removeAssumedBits(1 << AS);
+      removeAS(AS);
       indicateOptimisticFixpoint();
     }
   }
@@ -12813,7 +12817,7 @@ struct AANoAliasAddrSpaceImpl : public AANoAliasAddrSpace {
     uint32_t OrigAssumed = getAssumed();
 
     auto CheckAddressSpace = [&](Value &Obj) {
-      if (isa<UndefValue>(&Obj) || isa<PoisonValue>(&Obj))
+      if (isa<PoisonValue>(&Obj))
         return true;
       // Handle argument in flat address space only has addrspace cast uses
       if (auto *Arg = dyn_cast<Argument>(&Obj)) {
@@ -12824,18 +12828,26 @@ struct AANoAliasAddrSpaceImpl : public AANoAliasAddrSpace {
               return false;
             if (ASCI->getDestAddressSpace() == FlatAS)
               return false;
-            removeAssumedBits(1 << ASCI->getDestAddressSpace());
+            removeAS(ASCI->getDestAddressSpace());
           }
+          return true;
         }
       }
-      removeAssumedBits(1 << Obj.getType()->getPointerAddressSpace());
+
+      unsigned AS = Obj.getType()->getPointerAddressSpace();
+      if (AS == FlatAS)
+        return false;
+
+      removeAS(Obj.getType()->getPointerAddressSpace());
       return true;
     };
 
     auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(getIRPosition(), this,
                                                         DepClassTy::REQUIRED);
-    if (!AUO->forallUnderlyingObjects(CheckAddressSpace))
+    if (!AUO->forallUnderlyingObjects(CheckAddressSpace)) {
+      resetASRanges();
       return indicatePessimisticFixpoint();
+    }
 
     return OrigAssumed == getAssumed() ? ChangeStatus::UNCHANGED
                                        : ChangeStatus::CHANGED;
@@ -12854,15 +12866,14 @@ struct AANoAliasAddrSpaceImpl : public AANoAliasAddrSpace {
     LLVMContext &Ctx = getAssociatedValue().getContext();
     llvm::MDNode *NoAliasASNode = nullptr;
     MDBuilder MDB(Ctx);
-    for (unsigned int i = 1; i < 32; i++) {
-      if (i != FlatAS && isAssumed(1 << i)) {
-        if (NoAliasASNode == nullptr) {
-          NoAliasASNode = MDB.createRange(APInt(32, i), APInt(32, i + 1));
-        } else {
-          llvm::MDNode *ASRange =
-              MDB.createRange(APInt(32, i), APInt(32, i + 1));
-          NoAliasASNode = MDNode::getMostGenericRange(NoAliasASNode, ASRange);
-        }
+    for (auto range : ASRanges) {
+      if (NoAliasASNode == nullptr) {
+        NoAliasASNode =
+            MDB.createRange(APInt(32, range.first), APInt(32, range.second));
+      } else {
+        llvm::MDNode *ASRange =
+            MDB.createRange(APInt(32, range.first), APInt(32, range.second));
+        NoAliasASNode = MDNode::getMostGenericRange(NoAliasASNode, ASRange);
       }
     }
 
@@ -12878,17 +12889,17 @@ struct AANoAliasAddrSpaceImpl : public AANoAliasAddrSpace {
       auto *Inst = dyn_cast<Instruction>(U.getUser());
       if (!Inst)
         return true;
+      if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst) &&
+          !isa<AtomicCmpXchgInst>(Inst) && !isa<AtomicRMWInst>(Inst))
+        return true;
       if (!A.isRunOn(Inst->getFunction()))
         return true;
-      if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst) ||
-          isa<AtomicCmpXchgInst>(Inst) || isa<AtomicRMWInst>(Inst)) {
-        Inst->setMetadata(LLVMContext::MD_noalias_addrspace, NoAliasASNode);
-        Changed = true;
-      }
+      Inst->setMetadata(LLVMContext::MD_noalias_addrspace, NoAliasASNode);
+      Changed = true;
       return true;
     };
     (void)A.checkForAllUses(Pred, *this, getAssociatedValue(),
-                            /* CheckBBLivenessOnly */ true);
+                            /* CheckBBLivenessOnly=*/true);
     return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
   }
 
@@ -12899,12 +12910,42 @@ struct AANoAliasAddrSpaceImpl : public AANoAliasAddrSpace {
     std::string Str;
     raw_string_ostream OS(Str);
     OS << "noaliasaddrspace(";
-    for (unsigned int i = 1; i < 32; i++)
-      if (isAssumed(1 << i))
-        OS << ' ' << i;
+    for (auto range : ASRanges)
+      OS << ' ' << "[" << range.first << "," << range.second << ")";
     OS << " )";
     return OS.str();
   }
+
+private:
+  void removeAS(unsigned AS) {
+    for (auto it = ASRanges.begin(); it != ASRanges.end();) {
+      if (it->first == AS) {
+        uint32_t Upper = it->second;
+        ASRanges.erase(it);
+        ASRanges.push_back(std::pair(AS + 1, Upper));
+        return;
+      } else if (it->second - 1 == AS) {
+        uint32_t Lower = it->first;
+        ASRanges.erase(it);
+        ASRanges.push_back(std::pair(Lower, AS));
+        return;
+      } else if (it->first < AS && AS < it->second - 1) {
+        uint32_t Upper = it->second;
+        uint32_t Lower = it->first;
+        ASRanges.erase(it);
+        ASRanges.push_back(std::pair(Lower, AS));
+        ASRanges.push_back(std::pair(AS + 1, Upper));
+        return;
+      } else {
+        it++;
+      }
+    }
+  }
+
+  void resetASRanges() {
+    ASRanges.clear();
+    ASRanges.push_back(std::pair(0, MaxAddrSpace));
+  }
 };
 
 struct AANoAliasAddrSpaceFloating final : AANoAliasAddrSpaceImpl {

>From d5750d1f1f43e2f6805713aafdc8926cb23fd61d Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Sun, 27 Apr 2025 16:03:13 +0800
Subject: [PATCH 4/6] alloc only addrspace(5)

---
 .../AMDGPU/attributor-noalias-addrspace.ll    | 96 +++++++++----------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll b/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
index 759004ffcc1fd..24469733e14a6 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
@@ -3,22 +3,22 @@
 
 @gptr = protected addrspace(1) externally_initialized global i32 0, align 4
 
-define amdgpu_kernel void @no_alias_addr_space_select(ptr addrspace(5) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+define amdgpu_kernel void @no_alias_addr_space_select(ptr addrspace(3) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_select(
-; CHECK-SAME: ptr addrspace(5) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(3)
-; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(3) [[LPTR]] to ptr
-; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr addrspace(5) [[SPTR]] to ptr
+; CHECK-SAME: ptr addrspace(3) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
+; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr addrspace(3) [[SPTR]] to ptr
 ; CHECK-NEXT:    [[ADD_A:%.*]] = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(1) @gptr to ptr), i32 [[OFFSET]]
 ; CHECK-NEXT:    [[PTR:%.*]] = select i1 [[COND1]], ptr [[ADD_A]], ptr [[B]]
 ; CHECK-NEXT:    [[PTR2:%.*]] = select i1 [[COND2]], ptr [[PTR]], ptr [[C]]
 ; CHECK-NEXT:    store i32 [[VAL]], ptr [[PTR2]], align 4, !noalias.addrspace [[META0:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
-  %lptr = alloca i32, align 4, addrspace(3)
+  %lptr = alloca i32, align 4, addrspace(5)
   %a = addrspacecast ptr addrspace(1) @gptr to ptr
-  %b = addrspacecast ptr addrspace(3) %lptr to ptr
-  %c = addrspacecast ptr addrspace(5) %sptr to ptr
+  %b = addrspacecast ptr addrspace(5) %lptr to ptr
+  %c = addrspacecast ptr addrspace(3) %sptr to ptr
   %add_a = getelementptr inbounds i8, ptr %a, i32 %offset
   %ptr = select i1 %cond1, ptr %add_a, ptr %b
   %ptr2 = select i1 %cond2, ptr %ptr, ptr %c
@@ -41,22 +41,22 @@ define amdgpu_kernel void @no_alias_addr_space_arg(ptr %ptr, i32 %val, i1 %cond)
   ret void
 }
 
-define amdgpu_kernel void @no_alias_addr_space_branch(ptr addrspace(5) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+define amdgpu_kernel void @no_alias_addr_space_branch(ptr addrspace(3) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_branch(
-; CHECK-SAME: ptr addrspace(5) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(3) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    br i1 [[COND1]], label %[[BB_1_TRUE:.*]], label %[[BB_1_FALSE:.*]]
 ; CHECK:       [[BB_1_TRUE]]:
 ; CHECK-NEXT:    [[A:%.*]] = addrspacecast ptr addrspace(1) @gptr to ptr
 ; CHECK-NEXT:    br label %[[BB_1_END:.*]]
 ; CHECK:       [[BB_1_FALSE]]:
-; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(3)
-; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(3) [[LPTR]] to ptr
+; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
 ; CHECK-NEXT:    br label %[[BB_1_END]]
 ; CHECK:       [[BB_1_END]]:
 ; CHECK-NEXT:    [[PTR1:%.*]] = phi ptr [ [[A]], %[[BB_1_TRUE]] ], [ [[B]], %[[BB_1_FALSE]] ]
 ; CHECK-NEXT:    br i1 [[COND2]], label %[[BB_2_TRUE:.*]], label %[[BB_2_END:.*]]
 ; CHECK:       [[BB_2_TRUE]]:
-; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr addrspace(5) [[SPTR]] to ptr
+; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr addrspace(3) [[SPTR]] to ptr
 ; CHECK-NEXT:    br label %[[BB_2_END]]
 ; CHECK:       [[BB_2_END]]:
 ; CHECK-NEXT:    [[PTR2:%.*]] = phi ptr [ [[PTR1]], %[[BB_1_END]] ], [ [[C]], %[[BB_2_TRUE]] ]
@@ -69,8 +69,8 @@ bb.1.true:
   br label %bb.1.end
 
 bb.1.false:
-  %lptr = alloca i32, align 4, addrspace(3)
-  %b = addrspacecast ptr addrspace(3) %lptr to ptr
+  %lptr = alloca i32, align 4, addrspace(5)
+  %b = addrspacecast ptr addrspace(5) %lptr to ptr
   br label %bb.1.end
 
 bb.1.end:
@@ -78,7 +78,7 @@ bb.1.end:
   br i1 %cond2, label %bb.2.true, label %bb.2.end
 
 bb.2.true:
-  %c = addrspacecast ptr addrspace(5) %sptr to ptr
+  %c = addrspacecast ptr addrspace(3) %sptr to ptr
   br label %bb.2.end
 
 bb.2.end:
@@ -87,12 +87,12 @@ bb.2.end:
   ret void
 }
 
-define amdgpu_kernel void @no_alias_addr_space_select_cmpxchg(ptr addrspace(5) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+define amdgpu_kernel void @no_alias_addr_space_select_cmpxchg(ptr addrspace(3) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_select_cmpxchg(
-; CHECK-SAME: ptr addrspace(5) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(3)
-; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(3) [[LPTR]] to ptr
-; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr addrspace(5) [[SPTR]] to ptr
+; CHECK-SAME: ptr addrspace(3) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
+; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr addrspace(3) [[SPTR]] to ptr
 ; CHECK-NEXT:    [[ADD_A:%.*]] = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(1) @gptr to ptr), i32 [[OFFSET]]
 ; CHECK-NEXT:    [[PTR:%.*]] = select i1 [[COND1]], ptr [[ADD_A]], ptr [[B]]
 ; CHECK-NEXT:    [[PTR2:%.*]] = select i1 [[COND2]], ptr [[PTR]], ptr [[C]]
@@ -106,10 +106,10 @@ define amdgpu_kernel void @no_alias_addr_space_select_cmpxchg(ptr addrspace(5) %
 ; CHECK-NEXT:    [[CMPXCHG_7:%.*]] = cmpxchg weak volatile ptr [[PTR2]], i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4, !noalias.addrspace [[META0]]
 ; CHECK-NEXT:    ret void
 ;
-  %lptr = alloca i32, align 4, addrspace(3)
+  %lptr = alloca i32, align 4, addrspace(5)
   %a = addrspacecast ptr addrspace(1) @gptr to ptr
-  %b = addrspacecast ptr addrspace(3) %lptr to ptr
-  %c = addrspacecast ptr addrspace(5) %sptr to ptr
+  %b = addrspacecast ptr addrspace(5) %lptr to ptr
+  %c = addrspacecast ptr addrspace(3) %sptr to ptr
   %add_a = getelementptr inbounds i8, ptr %a, i32 %offset
   %ptr = select i1 %cond1, ptr %add_a, ptr %b
   %ptr2 = select i1 %cond2, ptr %ptr, ptr %c
@@ -124,22 +124,22 @@ define amdgpu_kernel void @no_alias_addr_space_select_cmpxchg(ptr addrspace(5) %
   ret void
 }
 
-define amdgpu_kernel void @no_alias_addr_space_branch_cmpxchg(ptr addrspace(5) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+define amdgpu_kernel void @no_alias_addr_space_branch_cmpxchg(ptr addrspace(3) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_branch_cmpxchg(
-; CHECK-SAME: ptr addrspace(5) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(3) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    br i1 [[COND1]], label %[[BB_1_TRUE:.*]], label %[[BB_1_FALSE:.*]]
 ; CHECK:       [[BB_1_TRUE]]:
 ; CHECK-NEXT:    [[A:%.*]] = addrspacecast ptr addrspace(1) @gptr to ptr
 ; CHECK-NEXT:    br label %[[BB_1_END:.*]]
 ; CHECK:       [[BB_1_FALSE]]:
-; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(3)
-; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(3) [[LPTR]] to ptr
+; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
 ; CHECK-NEXT:    br label %[[BB_1_END]]
 ; CHECK:       [[BB_1_END]]:
 ; CHECK-NEXT:    [[PTR1:%.*]] = phi ptr [ [[A]], %[[BB_1_TRUE]] ], [ [[B]], %[[BB_1_FALSE]] ]
 ; CHECK-NEXT:    br i1 [[COND2]], label %[[BB_2_TRUE:.*]], label %[[BB_2_END:.*]]
 ; CHECK:       [[BB_2_TRUE]]:
-; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr addrspace(5) [[SPTR]] to ptr
+; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr addrspace(3) [[SPTR]] to ptr
 ; CHECK-NEXT:    br label %[[BB_2_END]]
 ; CHECK:       [[BB_2_END]]:
 ; CHECK-NEXT:    [[PTR2:%.*]] = phi ptr [ [[PTR1]], %[[BB_1_END]] ], [ [[C]], %[[BB_2_TRUE]] ]
@@ -159,8 +159,8 @@ bb.1.true:
   br label %bb.1.end
 
 bb.1.false:
-  %lptr = alloca i32, align 4, addrspace(3)
-  %b = addrspacecast ptr addrspace(3) %lptr to ptr
+  %lptr = alloca i32, align 4, addrspace(5)
+  %b = addrspacecast ptr addrspace(5) %lptr to ptr
   br label %bb.1.end
 
 bb.1.end:
@@ -168,7 +168,7 @@ bb.1.end:
   br i1 %cond2, label %bb.2.true, label %bb.2.end
 
 bb.2.true:
-  %c = addrspacecast ptr addrspace(5) %sptr to ptr
+  %c = addrspacecast ptr addrspace(3) %sptr to ptr
   br label %bb.2.end
 
 bb.2.end:
@@ -184,12 +184,12 @@ bb.2.end:
   ret void
 }
 
-define amdgpu_kernel void @no_alias_addr_space_select_atomicrmw(ptr addrspace(5) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+define amdgpu_kernel void @no_alias_addr_space_select_atomicrmw(ptr addrspace(3) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_select_atomicrmw(
-; CHECK-SAME: ptr addrspace(5) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(3)
-; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(3) [[LPTR]] to ptr
-; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr addrspace(5) [[SPTR]] to ptr
+; CHECK-SAME: ptr addrspace(3) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
+; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr addrspace(3) [[SPTR]] to ptr
 ; CHECK-NEXT:    [[ADD_A:%.*]] = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(1) @gptr to ptr), i32 [[OFFSET]]
 ; CHECK-NEXT:    [[PTR:%.*]] = select i1 [[COND1]], ptr [[ADD_A]], ptr [[B]]
 ; CHECK-NEXT:    [[PTR2:%.*]] = select i1 [[COND2]], ptr [[PTR]], ptr [[C]]
@@ -206,10 +206,10 @@ define amdgpu_kernel void @no_alias_addr_space_select_atomicrmw(ptr addrspace(5)
 ; CHECK-NEXT:    [[ATOMICRMW_UMIN:%.*]] = atomicrmw volatile umin ptr [[PTR2]], i32 22 syncscope("singlethread") monotonic, align 4, !noalias.addrspace [[META0]]
 ; CHECK-NEXT:    ret void
 ;
-  %lptr = alloca i32, align 4, addrspace(3)
+  %lptr = alloca i32, align 4, addrspace(5)
   %a = addrspacecast ptr addrspace(1) @gptr to ptr
-  %b = addrspacecast ptr addrspace(3) %lptr to ptr
-  %c = addrspacecast ptr addrspace(5) %sptr to ptr
+  %b = addrspacecast ptr addrspace(5) %lptr to ptr
+  %c = addrspacecast ptr addrspace(3) %sptr to ptr
   %add_a = getelementptr inbounds i8, ptr %a, i32 %offset
   %ptr = select i1 %cond1, ptr %add_a, ptr %b
   %ptr2 = select i1 %cond2, ptr %ptr, ptr %c
@@ -227,22 +227,22 @@ define amdgpu_kernel void @no_alias_addr_space_select_atomicrmw(ptr addrspace(5)
   ret void
 }
 
-define amdgpu_kernel void @no_alias_addr_space_branch_atomicrmw(ptr addrspace(5) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+define amdgpu_kernel void @no_alias_addr_space_branch_atomicrmw(ptr addrspace(3) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_branch_atomicrmw(
-; CHECK-SAME: ptr addrspace(5) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: ptr addrspace(3) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    br i1 [[COND1]], label %[[BB_1_TRUE:.*]], label %[[BB_1_FALSE:.*]]
 ; CHECK:       [[BB_1_TRUE]]:
 ; CHECK-NEXT:    [[A:%.*]] = addrspacecast ptr addrspace(1) @gptr to ptr
 ; CHECK-NEXT:    br label %[[BB_1_END:.*]]
 ; CHECK:       [[BB_1_FALSE]]:
-; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(3)
-; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(3) [[LPTR]] to ptr
+; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
 ; CHECK-NEXT:    br label %[[BB_1_END]]
 ; CHECK:       [[BB_1_END]]:
 ; CHECK-NEXT:    [[PTR1:%.*]] = phi ptr [ [[A]], %[[BB_1_TRUE]] ], [ [[B]], %[[BB_1_FALSE]] ]
 ; CHECK-NEXT:    br i1 [[COND2]], label %[[BB_2_TRUE:.*]], label %[[BB_2_END:.*]]
 ; CHECK:       [[BB_2_TRUE]]:
-; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr addrspace(5) [[SPTR]] to ptr
+; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr addrspace(3) [[SPTR]] to ptr
 ; CHECK-NEXT:    br label %[[BB_2_END]]
 ; CHECK:       [[BB_2_END]]:
 ; CHECK-NEXT:    [[PTR2:%.*]] = phi ptr [ [[PTR1]], %[[BB_1_END]] ], [ [[C]], %[[BB_2_TRUE]] ]
@@ -265,8 +265,8 @@ bb.1.true:
   br label %bb.1.end
 
 bb.1.false:
-  %lptr = alloca i32, align 4, addrspace(3)
-  %b = addrspacecast ptr addrspace(3) %lptr to ptr
+  %lptr = alloca i32, align 4, addrspace(5)
+  %b = addrspacecast ptr addrspace(5) %lptr to ptr
   br label %bb.1.end
 
 bb.1.end:
@@ -274,7 +274,7 @@ bb.1.end:
   br i1 %cond2, label %bb.2.true, label %bb.2.end
 
 bb.2.true:
-  %c = addrspacecast ptr addrspace(5) %sptr to ptr
+  %c = addrspacecast ptr addrspace(3) %sptr to ptr
   br label %bb.2.end
 
 bb.2.end:

>From b924d8df107452f1aa48d3a8a76320b68be03566 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Sat, 3 May 2025 14:41:09 +0800
Subject: [PATCH 5/6] fix comments

---
 llvm/include/llvm/Transforms/IPO/Attributor.h |  16 +-
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   |  33 ++-
 .../Transforms/IPO/AttributorAttributes.cpp   |  12 +-
 .../AMDGPU/attributor-noalias-addrspace.ll    | 199 ++++++++++++++++++
 4 files changed, 220 insertions(+), 40 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index de531925ff861..8dd864b438c1d 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1349,6 +1349,8 @@ struct InformationCache {
   /// Return the flat address space if the associated target has.
   std::optional<unsigned> getFlatAddressSpace() const;
 
+  virtual unsigned getMaxAddrSpace() const { return ~(0); }
+
 private:
   struct FunctionInfo {
     ~FunctionInfo();
@@ -6367,25 +6369,11 @@ struct AANoAliasAddrSpace
     return (AA->getIdAddr() == &ID);
   }
 
-  void setMaxAddrSpace(unsigned MaxAS) {
-    MaxAddrSpace = MaxAS;
-    for (auto it = ASRanges.begin(); it != ASRanges.end();) {
-      if (it->first > MaxAS) {
-        it = ASRanges.erase(it);
-      } else if (it->second > MaxAS + 1) {
-        it->second = MaxAS + 1;
-      } else {
-        it++;
-      }
-    }
-  }
-
   /// Unique ID (due to the unique address)
   static const char ID;
 
 protected:
   SmallVector<std::pair<unsigned, unsigned>> ASRanges;
-  unsigned MaxAddrSpace = ~0;
 };
 
 struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index d1818ab44d6a0..a95f90d9d414e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -239,6 +239,10 @@ class AMDGPUInformationCache : public InformationCache {
     return ST.getMaxWavesPerEU();
   }
 
+  unsigned getMaxAddrSpace() const override {
+    return AMDGPUAS::MAX_AMDGPU_ADDRESS;
+  }
+
 private:
   /// Check if the ConstantExpr \p CE uses an addrspacecast from private or
   /// local to flat. These casts may require the queue pointer.
@@ -1401,30 +1405,19 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
     }
 
     for (auto &I : instructions(F)) {
+      Value *Ptr = nullptr;
       if (auto *LI = dyn_cast<LoadInst>(&I)) {
-        Value &Ptr = *(LI->getPointerOperand());
-        A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(Ptr));
-        const_cast<AANoAliasAddrSpace *>(
-            A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(Ptr)))
-            ->setMaxAddrSpace(AMDGPUAS::MAX_AMDGPU_ADDRESS);
+        Ptr = LI->getPointerOperand();
       } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
-        Value &Ptr = *(SI->getPointerOperand());
-        A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(Ptr));
-        const_cast<AANoAliasAddrSpace *>(
-            A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(Ptr)))
-            ->setMaxAddrSpace(AMDGPUAS::MAX_AMDGPU_ADDRESS);
+        Ptr = SI->getPointerOperand();
       } else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I)) {
-        Value &Ptr = *(RMW->getPointerOperand());
-        A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(Ptr));
-        const_cast<AANoAliasAddrSpace *>(
-            A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(Ptr)))
-            ->setMaxAddrSpace(AMDGPUAS::MAX_AMDGPU_ADDRESS);
+        Ptr = RMW->getPointerOperand();
       } else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I)) {
-        Value &Ptr = *(CmpX->getPointerOperand());
-        A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(Ptr));
-        const_cast<AANoAliasAddrSpace *>(
-            A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(Ptr)))
-            ->setMaxAddrSpace(AMDGPUAS::MAX_AMDGPU_ADDRESS);
+        Ptr = CmpX->getPointerOperand();
+      }
+      if (Ptr) {
+        A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
+        A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
       }
     }
   }
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index d0dad38cd3999..71f0416babb9f 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12792,13 +12792,13 @@ struct AANoAliasAddrSpaceImpl : public AANoAliasAddrSpace {
            "Associated value is not a pointer");
 
     if (!A.getInfoCache().getFlatAddressSpace().has_value()) {
-      resetASRanges();
+      resetASRanges(A);
       indicatePessimisticFixpoint();
       return;
     }
 
     unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
-    resetASRanges();
+    resetASRanges(A);
     removeAS(FlatAS);
 
     unsigned AS = getAssociatedType()->getPointerAddressSpace();
@@ -12841,7 +12841,7 @@ struct AANoAliasAddrSpaceImpl : public AANoAliasAddrSpace {
     auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(getIRPosition(), this,
                                                         DepClassTy::REQUIRED);
     if (!AUO->forallUnderlyingObjects(CheckAddressSpace)) {
-      resetASRanges();
+      resetASRanges(A);
       return indicatePessimisticFixpoint();
     }
 
@@ -12907,7 +12907,7 @@ struct AANoAliasAddrSpaceImpl : public AANoAliasAddrSpace {
     raw_string_ostream OS(Str);
     OS << "noaliasaddrspace(";
     for (auto range : ASRanges)
-      OS << ' ' << "[" << range.first << "," << range.second << ")";
+      OS << ' ' << '[' << range.first << ',' << range.second << ')';
     OS << " )";
     return OS.str();
   }
@@ -12938,9 +12938,9 @@ struct AANoAliasAddrSpaceImpl : public AANoAliasAddrSpace {
     }
   }
 
-  void resetASRanges() {
+  void resetASRanges(Attributor &A) {
     ASRanges.clear();
-    ASRanges.push_back(std::pair(0, MaxAddrSpace));
+    ASRanges.push_back(std::pair(0, A.getInfoCache().getMaxAddrSpace() + 1));
   }
 };
 
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll b/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
index 24469733e14a6..5a1fb40759882 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
@@ -292,6 +292,205 @@ bb.2.end:
   %atomicrmw.umin = atomicrmw volatile umin ptr %ptr2, i32 22 syncscope("singlethread") monotonic, align 4
   ret void
 }
+
+define amdgpu_kernel void @no_alias_addr_space_select_cmpxchg_flat(ptr %c, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_select_cmpxchg_flat(
+; CHECK-SAME: ptr [[C:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
+; CHECK-NEXT:    [[ADD_A:%.*]] = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(1) @gptr to ptr), i32 [[OFFSET]]
+; CHECK-NEXT:    [[PTR:%.*]] = select i1 [[COND1]], ptr [[ADD_A]], ptr [[B]]
+; CHECK-NEXT:    [[PTR2:%.*]] = select i1 [[COND2]], ptr [[PTR]], ptr [[C]]
+; CHECK-NEXT:    [[CMPXCHG_0:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 4 monotonic monotonic, align 4
+; CHECK-NEXT:    [[CMPXCHG_1:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 5 acq_rel monotonic, align 4
+; CHECK-NEXT:    [[CMPXCHG_2:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 6 acquire monotonic, align 4
+; CHECK-NEXT:    [[CMPXCHG_3:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 7 release monotonic, align 4
+; CHECK-NEXT:    [[CMPXCHG_4:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 8 seq_cst monotonic, align 4
+; CHECK-NEXT:    [[CMPXCHG_5:%.*]] = cmpxchg weak ptr [[PTR2]], i32 0, i32 9 seq_cst monotonic, align 4
+; CHECK-NEXT:    [[CMPXCHG_6:%.*]] = cmpxchg volatile ptr [[PTR2]], i32 0, i32 10 seq_cst monotonic, align 4
+; CHECK-NEXT:    [[CMPXCHG_7:%.*]] = cmpxchg weak volatile ptr [[PTR2]], i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4
+; CHECK-NEXT:    ret void
+;
+  %lptr = alloca i32, align 4, addrspace(5)
+  %a = addrspacecast ptr addrspace(1) @gptr to ptr
+  %b = addrspacecast ptr addrspace(5) %lptr to ptr
+  %add_a = getelementptr inbounds i8, ptr %a, i32 %offset
+  %ptr = select i1 %cond1, ptr %add_a, ptr %b
+  %ptr2 = select i1 %cond2, ptr %ptr, ptr %c
+  %cmpxchg.0 = cmpxchg ptr %ptr2, i32 0, i32 4 monotonic monotonic, align 4
+  %cmpxchg.1 = cmpxchg ptr %ptr2, i32 0, i32 5 acq_rel monotonic, align 4
+  %cmpxchg.2 = cmpxchg ptr %ptr2, i32 0, i32 6 acquire monotonic, align 4
+  %cmpxchg.3 = cmpxchg ptr %ptr2, i32 0, i32 7 release monotonic, align 4
+  %cmpxchg.4 = cmpxchg ptr %ptr2, i32 0, i32 8 seq_cst monotonic, align 4
+  %cmpxchg.5 = cmpxchg weak ptr %ptr2, i32 0, i32 9 seq_cst monotonic, align 4
+  %cmpxchg.6 = cmpxchg volatile ptr %ptr2, i32 0, i32 10 seq_cst monotonic, align 4
+  %cmpxchg.7 = cmpxchg weak volatile ptr %ptr2, i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4
+  ret void
+}
+
+define amdgpu_kernel void @no_alias_addr_space_branch_cmpxchg_flat(ptr %c, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_branch_cmpxchg_flat(
+; CHECK-SAME: ptr [[C:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    br i1 [[COND1]], label %[[BB_1_TRUE:.*]], label %[[BB_1_FALSE:.*]]
+; CHECK:       [[BB_1_TRUE]]:
+; CHECK-NEXT:    [[A:%.*]] = addrspacecast ptr addrspace(1) @gptr to ptr
+; CHECK-NEXT:    br label %[[BB_1_END:.*]]
+; CHECK:       [[BB_1_FALSE]]:
+; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
+; CHECK-NEXT:    br label %[[BB_1_END]]
+; CHECK:       [[BB_1_END]]:
+; CHECK-NEXT:    [[PTR1:%.*]] = phi ptr [ [[A]], %[[BB_1_TRUE]] ], [ [[B]], %[[BB_1_FALSE]] ]
+; CHECK-NEXT:    br i1 [[COND2]], label %[[BB_2_TRUE:.*]], label %[[BB_2_END:.*]]
+; CHECK:       [[BB_2_TRUE]]:
+; CHECK-NEXT:    br label %[[BB_2_END]]
+; CHECK:       [[BB_2_END]]:
+; CHECK-NEXT:    [[PTR2:%.*]] = phi ptr [ [[PTR1]], %[[BB_1_END]] ], [ [[C]], %[[BB_2_TRUE]] ]
+; CHECK-NEXT:    [[CMPXCHG_0:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 4 monotonic monotonic, align 4
+; CHECK-NEXT:    [[CMPXCHG_1:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 5 acq_rel monotonic, align 4
+; CHECK-NEXT:    [[CMPXCHG_2:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 6 acquire monotonic, align 4
+; CHECK-NEXT:    [[CMPXCHG_3:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 7 release monotonic, align 4
+; CHECK-NEXT:    [[CMPXCHG_4:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 8 seq_cst monotonic, align 4
+; CHECK-NEXT:    [[CMPXCHG_5:%.*]] = cmpxchg weak ptr [[PTR2]], i32 0, i32 9 seq_cst monotonic, align 4
+; CHECK-NEXT:    [[CMPXCHG_6:%.*]] = cmpxchg volatile ptr [[PTR2]], i32 0, i32 10 seq_cst monotonic, align 4
+; CHECK-NEXT:    [[CMPXCHG_7:%.*]] = cmpxchg weak volatile ptr [[PTR2]], i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4
+; CHECK-NEXT:    ret void
+;
+  br i1 %cond1, label %bb.1.true, label %bb.1.false
+bb.1.true:
+  %a = addrspacecast ptr addrspace(1) @gptr to ptr
+  br label %bb.1.end
+
+bb.1.false:
+  %lptr = alloca i32, align 4, addrspace(5)
+  %b = addrspacecast ptr addrspace(5) %lptr to ptr
+  br label %bb.1.end
+
+bb.1.end:
+  %ptr1 = phi ptr [ %a, %bb.1.true ], [ %b, %bb.1.false ]
+  br i1 %cond2, label %bb.2.true, label %bb.2.end
+
+bb.2.true:
+  br label %bb.2.end
+
+bb.2.end:
+  %ptr2 = phi ptr [ %ptr1, %bb.1.end ], [ %c, %bb.2.true ]
+  %cmpxchg.0 = cmpxchg ptr %ptr2, i32 0, i32 4 monotonic monotonic, align 4
+  %cmpxchg.1 = cmpxchg ptr %ptr2, i32 0, i32 5 acq_rel monotonic, align 4
+  %cmpxchg.2 = cmpxchg ptr %ptr2, i32 0, i32 6 acquire monotonic, align 4
+  %cmpxchg.3 = cmpxchg ptr %ptr2, i32 0, i32 7 release monotonic, align 4
+  %cmpxchg.4 = cmpxchg ptr %ptr2, i32 0, i32 8 seq_cst monotonic, align 4
+  %cmpxchg.5 = cmpxchg weak ptr %ptr2, i32 0, i32 9 seq_cst monotonic, align 4
+  %cmpxchg.6 = cmpxchg volatile ptr %ptr2, i32 0, i32 10 seq_cst monotonic, align 4
+  %cmpxchg.7 = cmpxchg weak volatile ptr %ptr2, i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4
+  ret void
+}
+
+define amdgpu_kernel void @no_alias_addr_space_select_atomicrmw_flat(ptr %c, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_select_atomicrmw_flat(
+; CHECK-SAME: ptr [[C:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
+; CHECK-NEXT:    [[ADD_A:%.*]] = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(1) @gptr to ptr), i32 [[OFFSET]]
+; CHECK-NEXT:    [[PTR:%.*]] = select i1 [[COND1]], ptr [[ADD_A]], ptr [[B]]
+; CHECK-NEXT:    [[PTR2:%.*]] = select i1 [[COND2]], ptr [[PTR]], ptr [[C]]
+; CHECK-NEXT:    [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr [[PTR2]], i32 12 monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_ADD:%.*]] = atomicrmw add ptr [[PTR2]], i32 13 monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_SUB:%.*]] = atomicrmw sub ptr [[PTR2]], i32 14 monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_AND:%.*]] = atomicrmw and ptr [[PTR2]], i32 15 monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_NAND:%.*]] = atomicrmw nand ptr [[PTR2]], i32 16 monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_OR:%.*]] = atomicrmw or ptr [[PTR2]], i32 17 monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_XOR:%.*]] = atomicrmw xor ptr [[PTR2]], i32 18 monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_MAX:%.*]] = atomicrmw max ptr [[PTR2]], i32 19 monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_MIN:%.*]] = atomicrmw volatile min ptr [[PTR2]], i32 20 monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_UMAX:%.*]] = atomicrmw umax ptr [[PTR2]], i32 21 syncscope("singlethread") monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_UMIN:%.*]] = atomicrmw volatile umin ptr [[PTR2]], i32 22 syncscope("singlethread") monotonic, align 4
+; CHECK-NEXT:    ret void
+;
+  %lptr = alloca i32, align 4, addrspace(5)
+  %a = addrspacecast ptr addrspace(1) @gptr to ptr
+  %b = addrspacecast ptr addrspace(5) %lptr to ptr
+  %add_a = getelementptr inbounds i8, ptr %a, i32 %offset
+  %ptr = select i1 %cond1, ptr %add_a, ptr %b
+  %ptr2 = select i1 %cond2, ptr %ptr, ptr %c
+  %atomicrmw.xchg = atomicrmw xchg ptr %ptr2, i32 12 monotonic, align 4
+  %atomicrmw.add = atomicrmw add ptr %ptr2, i32 13 monotonic, align 4
+  %atomicrmw.sub = atomicrmw sub ptr %ptr2, i32 14 monotonic, align 4
+  %atomicrmw.and = atomicrmw and ptr %ptr2, i32 15 monotonic, align 4
+  %atomicrmw.nand = atomicrmw nand ptr %ptr2, i32 16 monotonic, align 4
+  %atomicrmw.or = atomicrmw or ptr %ptr2, i32 17 monotonic, align 4
+  %atomicrmw.xor = atomicrmw xor ptr %ptr2, i32 18 monotonic, align 4
+  %atomicrmw.max = atomicrmw max ptr %ptr2, i32 19 monotonic, align 4
+  %atomicrmw.min = atomicrmw volatile min ptr %ptr2, i32 20 monotonic, align 4
+  %atomicrmw.umax = atomicrmw umax ptr %ptr2, i32 21 syncscope("singlethread") monotonic, align 4
+  %atomicrmw.umin = atomicrmw volatile umin ptr %ptr2, i32 22 syncscope("singlethread") monotonic, align 4
+  ret void
+}
+
+define amdgpu_kernel void @no_alias_addr_space_branch_atomicrmw_flat(ptr %c, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_branch_atomicrmw_flat(
+; CHECK-SAME: ptr [[C:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    br i1 [[COND1]], label %[[BB_1_TRUE:.*]], label %[[BB_1_FALSE:.*]]
+; CHECK:       [[BB_1_TRUE]]:
+; CHECK-NEXT:    [[A:%.*]] = addrspacecast ptr addrspace(1) @gptr to ptr
+; CHECK-NEXT:    br label %[[BB_1_END:.*]]
+; CHECK:       [[BB_1_FALSE]]:
+; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT:    [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
+; CHECK-NEXT:    br label %[[BB_1_END]]
+; CHECK:       [[BB_1_END]]:
+; CHECK-NEXT:    [[PTR1:%.*]] = phi ptr [ [[A]], %[[BB_1_TRUE]] ], [ [[B]], %[[BB_1_FALSE]] ]
+; CHECK-NEXT:    br i1 [[COND2]], label %[[BB_2_TRUE:.*]], label %[[BB_2_END:.*]]
+; CHECK:       [[BB_2_TRUE]]:
+; CHECK-NEXT:    br label %[[BB_2_END]]
+; CHECK:       [[BB_2_END]]:
+; CHECK-NEXT:    [[PTR2:%.*]] = phi ptr [ [[PTR1]], %[[BB_1_END]] ], [ [[C]], %[[BB_2_TRUE]] ]
+; CHECK-NEXT:    [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr [[PTR2]], i32 12 monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_ADD:%.*]] = atomicrmw add ptr [[PTR2]], i32 13 monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_SUB:%.*]] = atomicrmw sub ptr [[PTR2]], i32 14 monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_AND:%.*]] = atomicrmw and ptr [[PTR2]], i32 15 monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_NAND:%.*]] = atomicrmw nand ptr [[PTR2]], i32 16 monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_OR:%.*]] = atomicrmw or ptr [[PTR2]], i32 17 monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_XOR:%.*]] = atomicrmw xor ptr [[PTR2]], i32 18 monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_MAX:%.*]] = atomicrmw max ptr [[PTR2]], i32 19 monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_MIN:%.*]] = atomicrmw volatile min ptr [[PTR2]], i32 20 monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_UMAX:%.*]] = atomicrmw umax ptr [[PTR2]], i32 21 syncscope("singlethread") monotonic, align 4
+; CHECK-NEXT:    [[ATOMICRMW_UMIN:%.*]] = atomicrmw volatile umin ptr [[PTR2]], i32 22 syncscope("singlethread") monotonic, align 4
+; CHECK-NEXT:    ret void
+;
+  br i1 %cond1, label %bb.1.true, label %bb.1.false
+bb.1.true:
+  %a = addrspacecast ptr addrspace(1) @gptr to ptr
+  br label %bb.1.end
+
+bb.1.false:
+  %lptr = alloca i32, align 4, addrspace(5)
+  %b = addrspacecast ptr addrspace(5) %lptr to ptr
+  br label %bb.1.end
+
+bb.1.end:
+  %ptr1 = phi ptr [ %a, %bb.1.true ], [ %b, %bb.1.false ]
+  br i1 %cond2, label %bb.2.true, label %bb.2.end
+
+bb.2.true:
+  br label %bb.2.end
+
+bb.2.end:
+  %ptr2 = phi ptr [ %ptr1, %bb.1.end ], [ %c, %bb.2.true ]
+  %atomicrmw.xchg = atomicrmw xchg ptr %ptr2, i32 12 monotonic, align 4
+  %atomicrmw.add = atomicrmw add ptr %ptr2, i32 13 monotonic, align 4
+  %atomicrmw.sub = atomicrmw sub ptr %ptr2, i32 14 monotonic, align 4
+  %atomicrmw.and = atomicrmw and ptr %ptr2, i32 15 monotonic, align 4
+  %atomicrmw.nand = atomicrmw nand ptr %ptr2, i32 16 monotonic, align 4
+  %atomicrmw.or = atomicrmw or ptr %ptr2, i32 17 monotonic, align 4
+  %atomicrmw.xor = atomicrmw xor ptr %ptr2, i32 18 monotonic, align 4
+  %atomicrmw.max = atomicrmw max ptr %ptr2, i32 19 monotonic, align 4
+  %atomicrmw.min = atomicrmw volatile min ptr %ptr2, i32 20 monotonic, align 4
+  %atomicrmw.umax = atomicrmw umax ptr %ptr2, i32 21 syncscope("singlethread") monotonic, align 4
+  %atomicrmw.umin = atomicrmw volatile umin ptr %ptr2, i32 22 syncscope("singlethread") monotonic, align 4
+  ret void
+}
+
 ;.
 ; CHECK: [[META0]] = !{i32 2, i32 3, i32 4, i32 5, i32 6, i32 10}
 ; CHECK: [[META1]] = !{i32 1, i32 5, i32 6, i32 7, i32 8, i32 10}

>From b6e24324474e029a94f4c75a34d711b81b998e17 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Wed, 7 May 2025 13:41:33 +0800
Subject: [PATCH 6/6] fix comments

---
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   |  10 +-
 .../Transforms/IPO/AttributorAttributes.cpp   |  31 ++---
 .../AMDGPU/attributor-noalias-addrspace.ll    | 115 ++++++++++++++++++
 3 files changed, 137 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index a95f90d9d414e..7d13fb066be19 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1406,15 +1406,15 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
 
     for (auto &I : instructions(F)) {
       Value *Ptr = nullptr;
-      if (auto *LI = dyn_cast<LoadInst>(&I)) {
+      if (auto *LI = dyn_cast<LoadInst>(&I))
         Ptr = LI->getPointerOperand();
-      } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
+      else if (auto *SI = dyn_cast<StoreInst>(&I))
         Ptr = SI->getPointerOperand();
-      } else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I)) {
+      else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
         Ptr = RMW->getPointerOperand();
-      } else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I)) {
+      else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
         Ptr = CmpX->getPointerOperand();
-      }
+
       if (Ptr) {
         A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
         A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 71f0416babb9f..8e1f18c1135ea 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12791,18 +12791,18 @@ struct AANoAliasAddrSpaceImpl : public AANoAliasAddrSpace {
     assert(getAssociatedType()->isPtrOrPtrVectorTy() &&
            "Associated value is not a pointer");
 
-    if (!A.getInfoCache().getFlatAddressSpace().has_value()) {
-      resetASRanges(A);
+    resetASRanges(A);
+
+    auto FlatAS = A.getInfoCache().getFlatAddressSpace();
+    if (!FlatAS.has_value()) {
       indicatePessimisticFixpoint();
       return;
     }
 
-    unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
-    resetASRanges(A);
-    removeAS(FlatAS);
+    removeAS(FlatAS.value());
 
     unsigned AS = getAssociatedType()->getPointerAddressSpace();
-    if (AS != FlatAS) {
+    if (AS != FlatAS.value()) {
       removeAS(AS);
       indicateOptimisticFixpoint();
     }
@@ -12817,7 +12817,8 @@ struct AANoAliasAddrSpaceImpl : public AANoAliasAddrSpace {
         return true;
       // Handle argument in flat address space only has addrspace cast uses
       if (auto *Arg = dyn_cast<Argument>(&Obj)) {
-        if (Arg->getType()->getPointerAddressSpace() == FlatAS) {
+        if (Arg->getType()->getPointerAddressSpace() == FlatAS &&
+            getAssociatedFunction()->hasKernelCallingConv()) {
           for (auto *U : Arg->users()) {
             auto *ASCI = dyn_cast<AddrSpaceCastInst>(U);
             if (!ASCI)
@@ -12851,23 +12852,23 @@ struct AANoAliasAddrSpaceImpl : public AANoAliasAddrSpace {
 
   /// See AbstractAttribute::manifest(...).
   ChangeStatus manifest(Attributor &A) override {
-    if (!A.getInfoCache().getFlatAddressSpace().has_value())
+    auto FlatAS = A.getInfoCache().getFlatAddressSpace();
+    if (!FlatAS.has_value())
       return ChangeStatus::UNCHANGED;
 
-    unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
     unsigned AS = getAssociatedType()->getPointerAddressSpace();
-    if (AS != FlatAS)
+    if (AS != FlatAS.value())
       return ChangeStatus::UNCHANGED;
 
     LLVMContext &Ctx = getAssociatedValue().getContext();
-    llvm::MDNode *NoAliasASNode = nullptr;
+    MDNode *NoAliasASNode = nullptr;
     MDBuilder MDB(Ctx);
     for (auto range : ASRanges) {
       if (NoAliasASNode == nullptr) {
         NoAliasASNode =
             MDB.createRange(APInt(32, range.first), APInt(32, range.second));
       } else {
-        llvm::MDNode *ASRange =
+        MDNode *ASRange =
             MDB.createRange(APInt(32, range.first), APInt(32, range.second));
         NoAliasASNode = MDNode::getMostGenericRange(NoAliasASNode, ASRange);
       }
@@ -12918,12 +12919,14 @@ struct AANoAliasAddrSpaceImpl : public AANoAliasAddrSpace {
       if (it->first == AS) {
         uint32_t Upper = it->second;
         ASRanges.erase(it);
-        ASRanges.push_back(std::pair(AS + 1, Upper));
+        if (AS + 1 < Upper)
+          ASRanges.push_back(std::pair(AS + 1, Upper));
         return;
       } else if (it->second - 1 == AS) {
         uint32_t Lower = it->first;
         ASRanges.erase(it);
-        ASRanges.push_back(std::pair(Lower, AS));
+        if (Lower < AS)
+          ASRanges.push_back(std::pair(Lower, AS));
         return;
       } else if (it->first < AS && AS < it->second - 1) {
         uint32_t Upper = it->second;
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll b/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
index 5a1fb40759882..5a8bf713ae013 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
@@ -2,6 +2,8 @@
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck %s
 
 @gptr = protected addrspace(1) externally_initialized global i32 0, align 4
+ at gptr2 = protected addrspace(5) externally_initialized global i32 0, align 4
+ at gptr3 = protected addrspace(3) externally_initialized global i32 0, align 4
 
 define amdgpu_kernel void @no_alias_addr_space_select(ptr addrspace(3) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_select(
@@ -491,6 +493,119 @@ bb.2.end:
   ret void
 }
 
+define internal void @callee_no_alias_addr_space_select(ptr %ptr1, ptr %ptr2, ptr %ptr3, i1 %cond1, i1 %cond2, i32 %val) #0 {
+; CHECK-LABEL: define internal void @callee_no_alias_addr_space_select(
+; CHECK-SAME: ptr [[PTR1:%.*]], ptr [[PTR2:%.*]], ptr [[PTR3:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    [[PTR4:%.*]] = select i1 [[COND1]], ptr addrspacecast (ptr addrspace(1) @gptr to ptr), ptr addrspacecast (ptr addrspace(5) @gptr2 to ptr)
+; CHECK-NEXT:    [[PTR5:%.*]] = select i1 [[COND2]], ptr [[PTR4]], ptr addrspacecast (ptr addrspace(3) @gptr3 to ptr)
+; CHECK-NEXT:    store i32 [[VAL]], ptr [[PTR5]], align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr [[PTR5]], i32 12 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_ADD:%.*]] = atomicrmw add ptr [[PTR5]], i32 13 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_SUB:%.*]] = atomicrmw sub ptr [[PTR5]], i32 14 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_AND:%.*]] = atomicrmw and ptr [[PTR5]], i32 15 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_NAND:%.*]] = atomicrmw nand ptr [[PTR5]], i32 16 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_OR:%.*]] = atomicrmw or ptr [[PTR5]], i32 17 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_XOR:%.*]] = atomicrmw xor ptr [[PTR5]], i32 18 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_MAX:%.*]] = atomicrmw max ptr [[PTR5]], i32 19 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_MIN:%.*]] = atomicrmw volatile min ptr [[PTR5]], i32 20 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_UMAX:%.*]] = atomicrmw umax ptr [[PTR5]], i32 21 syncscope("singlethread") monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_UMIN:%.*]] = atomicrmw volatile umin ptr [[PTR5]], i32 22 syncscope("singlethread") monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    ret void
+;
+  %ptr4 = select i1 %cond1, ptr %ptr1, ptr %ptr2
+  %ptr5 = select i1 %cond2, ptr %ptr4, ptr %ptr3
+  store i32 %val, ptr %ptr5
+  %atomicrmw.xchg = atomicrmw xchg ptr %ptr5, i32 12 monotonic, align 4
+  %atomicrmw.add = atomicrmw add ptr %ptr5, i32 13 monotonic, align 4
+  %atomicrmw.sub = atomicrmw sub ptr %ptr5, i32 14 monotonic, align 4
+  %atomicrmw.and = atomicrmw and ptr %ptr5, i32 15 monotonic, align 4
+  %atomicrmw.nand = atomicrmw nand ptr %ptr5, i32 16 monotonic, align 4
+  %atomicrmw.or = atomicrmw or ptr %ptr5, i32 17 monotonic, align 4
+  %atomicrmw.xor = atomicrmw xor ptr %ptr5, i32 18 monotonic, align 4
+  %atomicrmw.max = atomicrmw max ptr %ptr5, i32 19 monotonic, align 4
+  %atomicrmw.min = atomicrmw volatile min ptr %ptr5, i32 20 monotonic, align 4
+  %atomicrmw.umax = atomicrmw umax ptr %ptr5, i32 21 syncscope("singlethread") monotonic, align 4
+  %atomicrmw.umin = atomicrmw volatile umin ptr %ptr5, i32 22 syncscope("singlethread") monotonic, align 4
+  ret void
+}
+
+define internal void @callee_alias_addr_space_branch(ptr %ptr1, ptr %ptr2, ptr %ptr3, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define internal void @callee_alias_addr_space_branch(
+; CHECK-SAME: ptr [[PTR1:%.*]], ptr [[PTR2:%.*]], ptr [[PTR3:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    br i1 [[COND1]], label %[[BB_1_TRUE:.*]], label %[[BB_1_FALSE:.*]]
+; CHECK:       [[BB_1_TRUE]]:
+; CHECK-NEXT:    br label %[[BB_1_END:.*]]
+; CHECK:       [[BB_1_FALSE]]:
+; CHECK-NEXT:    br label %[[BB_1_END]]
+; CHECK:       [[BB_1_END]]:
+; CHECK-NEXT:    [[PTR4:%.*]] = phi ptr [ addrspacecast (ptr addrspace(1) @gptr to ptr), %[[BB_1_TRUE]] ], [ addrspacecast (ptr addrspace(5) @gptr2 to ptr), %[[BB_1_FALSE]] ]
+; CHECK-NEXT:    br i1 [[COND2]], label %[[BB_2_TRUE:.*]], label %[[BB_2_END:.*]]
+; CHECK:       [[BB_2_TRUE]]:
+; CHECK-NEXT:    br label %[[BB_2_END]]
+; CHECK:       [[BB_2_END]]:
+; CHECK-NEXT:    [[PTR5:%.*]] = phi ptr [ [[PTR4]], %[[BB_1_END]] ], [ addrspacecast (ptr addrspace(3) @gptr3 to ptr), %[[BB_2_TRUE]] ]
+; CHECK-NEXT:    store i32 [[VAL]], ptr [[PTR5]], align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr [[PTR5]], i32 12 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_ADD:%.*]] = atomicrmw add ptr [[PTR5]], i32 13 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_SUB:%.*]] = atomicrmw sub ptr [[PTR5]], i32 14 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_AND:%.*]] = atomicrmw and ptr [[PTR5]], i32 15 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_NAND:%.*]] = atomicrmw nand ptr [[PTR5]], i32 16 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_OR:%.*]] = atomicrmw or ptr [[PTR5]], i32 17 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_XOR:%.*]] = atomicrmw xor ptr [[PTR5]], i32 18 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_MAX:%.*]] = atomicrmw max ptr [[PTR5]], i32 19 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_MIN:%.*]] = atomicrmw volatile min ptr [[PTR5]], i32 20 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_UMAX:%.*]] = atomicrmw umax ptr [[PTR5]], i32 21 syncscope("singlethread") monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    [[ATOMICRMW_UMIN:%.*]] = atomicrmw volatile umin ptr [[PTR5]], i32 22 syncscope("singlethread") monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT:    ret void
+;
+  br i1 %cond1, label %bb.1.true, label %bb.1.false
+bb.1.true:
+  br label %bb.1.end
+
+bb.1.false:
+  br label %bb.1.end
+
+bb.1.end:
+  %ptr4 = phi ptr [ %ptr1, %bb.1.true ], [ %ptr2, %bb.1.false ]
+  br i1 %cond2, label %bb.2.true, label %bb.2.end
+
+bb.2.true:
+  br label %bb.2.end
+
+bb.2.end:
+  %ptr5 = phi ptr [ %ptr4, %bb.1.end ], [ %ptr3, %bb.2.true ]
+  store i32 %val, ptr %ptr5
+  %atomicrmw.xchg = atomicrmw xchg ptr %ptr5, i32 12 monotonic, align 4
+  %atomicrmw.add = atomicrmw add ptr %ptr5, i32 13 monotonic, align 4
+  %atomicrmw.sub = atomicrmw sub ptr %ptr5, i32 14 monotonic, align 4
+  %atomicrmw.and = atomicrmw and ptr %ptr5, i32 15 monotonic, align 4
+  %atomicrmw.nand = atomicrmw nand ptr %ptr5, i32 16 monotonic, align 4
+  %atomicrmw.or = atomicrmw or ptr %ptr5, i32 17 monotonic, align 4
+  %atomicrmw.xor = atomicrmw xor ptr %ptr5, i32 18 monotonic, align 4
+  %atomicrmw.max = atomicrmw max ptr %ptr5, i32 19 monotonic, align 4
+  %atomicrmw.min = atomicrmw volatile min ptr %ptr5, i32 20 monotonic, align 4
+  %atomicrmw.umax = atomicrmw umax ptr %ptr5, i32 21 syncscope("singlethread") monotonic, align 4
+  %atomicrmw.umin = atomicrmw volatile umin ptr %ptr5, i32 22 syncscope("singlethread") monotonic, align 4
+  ret void
+}
+
+define amdgpu_kernel void @kernal_call_func(i1 %cond1, i1 %cond2, i32 %val) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @kernal_call_func(
+; CHECK-SAME: i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT:    call void @callee_no_alias_addr_space_select(ptr addrspacecast (ptr addrspace(1) @gptr to ptr), ptr addrspacecast (ptr addrspace(5) @gptr2 to ptr), ptr addrspacecast (ptr addrspace(3) @gptr3 to ptr), i1 [[COND1]], i1 [[COND2]], i32 [[VAL]])
+; CHECK-NEXT:    call void @callee_alias_addr_space_branch(ptr addrspacecast (ptr addrspace(1) @gptr to ptr), ptr addrspacecast (ptr addrspace(5) @gptr2 to ptr), ptr addrspacecast (ptr addrspace(3) @gptr3 to ptr), i1 [[COND1]], i1 [[COND2]], i32 [[VAL]])
+; CHECK-NEXT:    ret void
+;
+  %lptr = alloca i32, align 4, addrspace(5)
+  %a = addrspacecast ptr addrspace(1) @gptr to ptr
+  %b = addrspacecast ptr addrspace(5) @gptr2 to ptr
+  %c = addrspacecast ptr addrspace(3) @gptr3 to ptr
+  call void @callee_no_alias_addr_space_select(ptr %a, ptr %b, ptr %c, i1 %cond1, i1 %cond2, i32 %val)
+  call void @callee_alias_addr_space_branch(ptr %a, ptr %b, ptr %c, i1 %cond1, i1 %cond2, i32 %val)
+  ret void
+}
+
 ;.
 ; CHECK: [[META0]] = !{i32 2, i32 3, i32 4, i32 5, i32 6, i32 10}
 ; CHECK: [[META1]] = !{i32 1, i32 5, i32 6, i32 7, i32 8, i32 10}



More information about the llvm-commits mailing list