[llvm] [IPO] Added attributor for identifying invariant loads (PR #141800)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 5 12:59:52 PDT 2025


https://github.com/zGoldthorpe updated https://github.com/llvm/llvm-project/pull/141800

>From fc269c14e24b6a9731ce354fb1f1e682cb78d53e Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Wed, 28 May 2025 11:05:47 -0500
Subject: [PATCH 1/8] Added attributor for identifying `!invariant.load`s.

---
 llvm/include/llvm/Transforms/IPO/Attributor.h |  38 +++
 llvm/lib/Transforms/IPO/Attributor.cpp        |   2 +
 .../Transforms/IPO/AttributorAttributes.cpp   | 245 ++++++++++++++++++
 .../multiple-offsets-pointer-info.ll          |   8 +-
 .../Attributor/tag-invariant-loads.ll         | 220 ++++++++++++++++
 5 files changed, 509 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Transforms/Attributor/tag-invariant-loads.ll

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index c628bbb007230..53fa7a04dc5b5 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6289,6 +6289,44 @@ struct AAUnderlyingObjects : AbstractAttribute {
                           AA::ValueScope Scope = AA::Interprocedural) const = 0;
 };
 
+/// An abstract interface for identifying pointers from which loads can be
+/// marked invariant.
+struct AAInvariantLoadPointer : public AbstractAttribute {
+  AAInvariantLoadPointer(const IRPosition &IRP) : AbstractAttribute(IRP) {}
+
+  /// See AbstractAttribute::isValidIRPositionForInit
+  static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+    if (!IRP.getAssociatedType()->isPointerTy())
+      return false;
+    return AbstractAttribute::isValidIRPositionForInit(A, IRP);
+  }
+
+  /// Create an abstract attribute view for the position \p IRP.
+  static AAInvariantLoadPointer &createForPosition(const IRPosition &IRP,
+                                                   Attributor &A);
+
+  /// Return true if the pointer's contents are known to remain invariant.
+  virtual bool isKnownInvariant() const = 0;
+
+  /// Return true if the pointer's contents are assumed to remain invariant.
+  virtual bool isAssumedInvariant() const = 0;
+
+  /// See AbstractAttribute::getName().
+  StringRef getName() const override { return "AAInvariantLoadPointer"; }
+
+  /// See AbstractAttribute::getIdAddr().
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AAInvariantLoadPointer
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
+  /// Unique ID (due to the unique address).
+  static const char ID;
+};
+
 /// An abstract interface for address space information.
 struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
   AAAddressSpace(const IRPosition &IRP, Attributor &A)
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index cbdbf9ae1494d..1dc576656d12a 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -3620,6 +3620,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
       if (SimplifyAllLoads)
         getAssumedSimplified(IRPosition::value(I), nullptr,
                              UsedAssumedInformation, AA::Intraprocedural);
+      getOrCreateAAFor<AAInvariantLoadPointer>(
+          IRPosition::value(*LI->getPointerOperand()));
       getOrCreateAAFor<AAAddressSpace>(
           IRPosition::value(*LI->getPointerOperand()));
     } else {
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 470c5308edca4..f0647747d6c7f 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -191,6 +191,7 @@ PIPE_OPERATOR(AAInterFnReachability)
 PIPE_OPERATOR(AAPointerInfo)
 PIPE_OPERATOR(AAAssumptionInfo)
 PIPE_OPERATOR(AAUnderlyingObjects)
+PIPE_OPERATOR(AAInvariantLoadPointer)
 PIPE_OPERATOR(AAAddressSpace)
 PIPE_OPERATOR(AAAllocationInfo)
 PIPE_OPERATOR(AAIndirectCallInfo)
@@ -12534,6 +12535,248 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo {
 };
 } // namespace
 
+/// --------------------- Invariant Load Pointer -------------------------------
+namespace {
+
+struct AAInvariantLoadPointerImpl
+    : public StateWrapper<BitIntegerState<uint8_t, 7>, AAInvariantLoadPointer,
+                          uint8_t> {
+  // load invariance is implied by, but not equivalent to IS_NOALIAS |
+  // IS_READONLY, as load invariance is also implied by all underlying objects
+  // being load invariant.
+  //
+  // IS_INVARIANT is set to indicate that the contents of the pointer are
+  // *known* to be invariant.
+  enum {
+    IS_INVARIANT = 1 << 0,
+    IS_NOALIAS = 1 << 1,
+    IS_READONLY = 1 << 2,
+  };
+  static_assert(getBestState() == (IS_INVARIANT | IS_NOALIAS | IS_READONLY),
+                "Unexpected best state!");
+
+  using Base = StateWrapper<BitIntegerState<uint8_t, 7>, AAInvariantLoadPointer,
+                            uint8_t>;
+
+  // the BitIntegerState is optimistic about noalias and readonly, but
+  // pessimistic about invariance
+  AAInvariantLoadPointerImpl(const IRPosition &IRP, Attributor &A)
+      : Base(IRP, IS_NOALIAS | IS_READONLY) {}
+
+  void initialize(Attributor &A) final {
+    // conservatively assume that the pointer's contents are not invariant,
+    // until proven otherwise.
+    removeAssumedBits(IS_INVARIANT);
+  }
+
+  bool isKnownInvariant() const final {
+    return isKnown(IS_INVARIANT) || isKnown(IS_NOALIAS | IS_READONLY);
+  }
+
+  bool isAssumedInvariant() const final {
+    return isAssumed(IS_INVARIANT) || isAssumed(IS_NOALIAS | IS_READONLY);
+  }
+
+  ChangeStatus updateImpl(Attributor &A) override {
+    if (isKnownInvariant())
+      return ChangeStatus::UNCHANGED;
+
+    ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+    Changed |= updateNoAlias(A);
+    Changed |= updateReadOnly(A);
+
+    bool UsedAssumedInformation = false;
+    const auto IsInvariantLoadIfPointer = [&](const Value &V) {
+      if (!V.getType()->isPointerTy())
+        return true;
+      const auto *IsInvariantLoadPointer =
+          A.getOrCreateAAFor<AAInvariantLoadPointer>(IRPosition::value(V), this,
+                                                     DepClassTy::REQUIRED);
+      if (IsInvariantLoadPointer->isKnownInvariant())
+        return true;
+      if (!IsInvariantLoadPointer->isAssumedInvariant())
+        return false;
+
+      UsedAssumedInformation = true;
+      return true;
+    };
+
+    const auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(
+        getIRPosition(), this, DepClassTy::REQUIRED);
+
+    if (!AUO->forallUnderlyingObjects(IsInvariantLoadIfPointer)) {
+      removeAssumedBits(IS_INVARIANT);
+      return ChangeStatus::CHANGED;
+    }
+
+    if (!UsedAssumedInformation) {
+      // pointer is known (not assumed) to be invariant
+      addKnownBits(IS_INVARIANT);
+      return ChangeStatus::CHANGED;
+    }
+
+    return Changed;
+  }
+
+  ChangeStatus manifest(Attributor &A) override {
+    if (!isKnownInvariant())
+      return ChangeStatus::UNCHANGED;
+
+    ChangeStatus Changed = ChangeStatus::UNCHANGED;
+    Value *Ptr = &getAssociatedValue();
+    const auto TagInvariantLoads = [&](const Use &U, bool &) {
+      if (U.get() != Ptr)
+        return true;
+      auto *I = dyn_cast<Instruction>(U.getUser());
+      if (!I)
+        return true;
+
+      // Ensure that we are only changing uses from the corresponding callgraph
+      // SSC in the case that the AA isn't run on the entire module
+      if (!A.isRunOn(I->getFunction()))
+        return true;
+
+      if (I->hasMetadata(LLVMContext::MD_invariant_load))
+        return true;
+
+      if (auto *LI = dyn_cast<LoadInst>(I)) {
+        if (LI->isVolatile() || LI->isAtomic())
+          return true;
+
+        LI->setMetadata(LLVMContext::MD_invariant_load,
+                        MDNode::get(LI->getContext(), {}));
+        Changed = ChangeStatus::CHANGED;
+      }
+      return true;
+    };
+
+    (void)A.checkForAllUses(TagInvariantLoads, *this, *Ptr);
+    return Changed;
+  }
+
+  /// See AbstractAttribute::getAsStr().
+  const std::string getAsStr(Attributor *) const override {
+    std::string Str;
+    raw_string_ostream OS(Str);
+    OS << "load invariant pointer: " << isKnown() << '\n';
+    return Str;
+  }
+
+  /// See AbstractAttribute::trackStatistics().
+  void trackStatistics() const override {}
+
+protected:
+  ChangeStatus updateNoAlias(Attributor &A) {
+    if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS))
+      return ChangeStatus::UNCHANGED;
+
+    const auto *ANoAlias = A.getOrCreateAAFor<AANoAlias>(getIRPosition(), this,
+                                                         DepClassTy::REQUIRED);
+    if (!ANoAlias)
+      return tryInferNoAlias(A);
+
+    if (!ANoAlias->isAssumedNoAlias()) {
+      removeAssumedBits(IS_NOALIAS);
+      return ChangeStatus::CHANGED;
+    }
+    if (ANoAlias->isKnownNoAlias())
+      addKnownBits(IS_NOALIAS);
+
+    return ChangeStatus::UNCHANGED;
+  }
+
+  /// Fallback method if updateNoAlias fails to infer noalias information from
+  /// AANoAlias.
+  virtual ChangeStatus tryInferNoAlias(Attributor &A) {
+    return ChangeStatus::UNCHANGED;
+  }
+
+  ChangeStatus updateReadOnly(Attributor &A) {
+    if (isKnown(IS_READONLY) || !isAssumed(IS_READONLY))
+      return ChangeStatus::UNCHANGED;
+
+    // AAMemoryBehavior may crash if value is global
+    if (!getAssociatedFunction())
+      return tryInferReadOnly(A);
+
+    const auto *AMemoryBehavior = A.getOrCreateAAFor<AAMemoryBehavior>(
+        getIRPosition(), this, DepClassTy::REQUIRED);
+    if (!AMemoryBehavior)
+      return tryInferReadOnly(A);
+
+    if (!AMemoryBehavior->isAssumedReadOnly()) {
+      removeAssumedBits(IS_READONLY);
+      return ChangeStatus::CHANGED;
+    }
+    if (AMemoryBehavior->isKnownReadOnly())
+      addKnownBits(IS_READONLY);
+
+    return ChangeStatus::UNCHANGED;
+  }
+
+  /// Fallback method if updateReadOnly fails to infer readonly information from
+  /// AAMemoryBehavior.
+  virtual ChangeStatus tryInferReadOnly(Attributor &A) {
+    return ChangeStatus::UNCHANGED;
+  }
+};
+
+struct AAInvariantLoadPointerFloating final : AAInvariantLoadPointerImpl {
+  AAInvariantLoadPointerFloating(const IRPosition &IRP, Attributor &A)
+      : AAInvariantLoadPointerImpl(IRP, A) {}
+};
+
+struct AAInvariantLoadPointerReturned final : AAInvariantLoadPointerImpl {
+  AAInvariantLoadPointerReturned(const IRPosition &IRP, Attributor &A)
+      : AAInvariantLoadPointerImpl(IRP, A) {}
+};
+
+struct AAInvariantLoadPointerCallSiteReturned final
+    : AAInvariantLoadPointerImpl {
+  AAInvariantLoadPointerCallSiteReturned(const IRPosition &IRP, Attributor &A)
+      : AAInvariantLoadPointerImpl(IRP, A) {}
+};
+
+struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl {
+  AAInvariantLoadPointerArgument(const IRPosition &IRP, Attributor &A)
+      : AAInvariantLoadPointerImpl(IRP, A) {}
+
+protected:
+  ChangeStatus tryInferNoAlias(Attributor &A) override {
+    const auto *Arg = getAssociatedArgument();
+    if (Arg->hasNoAliasAttr()) {
+      addKnownBits(IS_NOALIAS);
+      return ChangeStatus::UNCHANGED;
+    }
+
+    // noalias information is not provided, and cannot be inferred from
+    // AANoAlias
+    removeAssumedBits(IS_NOALIAS);
+    return ChangeStatus::CHANGED;
+  }
+
+  ChangeStatus tryInferReadOnly(Attributor &A) override {
+    const auto *Arg = getAssociatedArgument();
+    if (Arg->onlyReadsMemory()) {
+      addKnownBits(IS_READONLY);
+      return ChangeStatus::UNCHANGED;
+    }
+
+    // readonly information is not provided, and cannot be inferred from
+    // AAMemoryBehavior
+    removeAssumedBits(IS_READONLY);
+    return ChangeStatus::CHANGED;
+  }
+};
+
+struct AAInvariantLoadPointerCallSiteArgument final
+    : AAInvariantLoadPointerImpl {
+  AAInvariantLoadPointerCallSiteArgument(const IRPosition &IRP, Attributor &A)
+      : AAInvariantLoadPointerImpl(IRP, A) {}
+};
+} // namespace
+
 /// ------------------------ Address Space  ------------------------------------
 namespace {
 
@@ -13031,6 +13274,7 @@ const char AAInterFnReachability::ID = 0;
 const char AAPointerInfo::ID = 0;
 const char AAAssumptionInfo::ID = 0;
 const char AAUnderlyingObjects::ID = 0;
+const char AAInvariantLoadPointer::ID = 0;
 const char AAAddressSpace::ID = 0;
 const char AAAllocationInfo::ID = 0;
 const char AAIndirectCallInfo::ID = 0;
@@ -13165,6 +13409,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFPClass)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInvariantLoadPointer)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAddressSpace)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAllocationInfo)
 
diff --git a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
index f04ac4d73340f..9e58a35107491 100644
--- a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
@@ -10,7 +10,7 @@ define i8 @select_offsets_simplifiable_1(i1 %cnd1, i1 %cnd2) {
 ; CHECK-LABEL: define {{[^@]+}}@select_offsets_simplifiable_1
 ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1)
+; CHECK-NEXT:    [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1)
 ; CHECK-NEXT:    [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 23
 ; CHECK-NEXT:    store i8 23, ptr [[GEP23]], align 4
 ; CHECK-NEXT:    [[GEP29:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 29
@@ -190,7 +190,7 @@ define i8 @select_offsets_not_simplifiable_3(i1 %cnd1, i1 %cnd2) {
 ; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_3
 ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1)
+; CHECK-NEXT:    [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1)
 ; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29
 ; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7
 ; CHECK-NEXT:    [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[SEL1]]
@@ -214,7 +214,7 @@ define i8 @select_offsets_not_simplifiable_4(i1 %cnd1, i1 %cnd2) {
 ; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_4
 ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1)
+; CHECK-NEXT:    [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1)
 ; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29
 ; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7
 ; CHECK-NEXT:    [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[SEL1]]
@@ -445,7 +445,7 @@ define i8 @phi_gep_not_simplifiable_2(i1 %cnd1, i1 %cnd2) {
 ; CHECK-LABEL: define {{[^@]+}}@phi_gep_not_simplifiable_2
 ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1)
+; CHECK-NEXT:    [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1)
 ; CHECK-NEXT:    [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 23
 ; CHECK-NEXT:    br i1 [[CND1]], label [[THEN:%.*]], label [[ELSE:%.*]]
 ; CHECK:       then:
diff --git a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
new file mode 100644
index 0000000000000..6df07a0d68bee
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
@@ -0,0 +1,220 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=attributor %s -S | FileCheck %s
+
+ at G = global i32 zeroinitializer, align 4
+
+declare ptr @get_ptr()
+declare noalias ptr @get_noalias_ptr()
+
+define i32 @test_plain(ptr %ptr) {
+; CHECK-LABEL: define i32 @test_plain(
+; CHECK-SAME: ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %val = load i32, ptr %ptr, align 4
+  ret i32 %val
+}
+
+define i32 @test_noalias_ptr(ptr noalias %ptr) {
+; CHECK-LABEL: define i32 @test_noalias_ptr(
+; CHECK-SAME: ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0:![0-9]+]]
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %val = load i32, ptr %ptr, align 4
+  ret i32 %val
+}
+
+define i32 @test_swap(ptr noalias %ptr, i32 %write) {
+; CHECK-LABEL: define i32 @test_swap(
+; CHECK-SAME: ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[PTR:%.*]], i32 [[WRITE:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    store i32 [[WRITE]], ptr [[PTR]], align 4
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %val = load i32, ptr %ptr, align 4
+  store i32 %write, ptr %ptr, align 4
+  ret i32 %val
+}
+
+define i32 @test_volatile_load(ptr noalias %ptr) {
+; CHECK-LABEL: define i32 @test_volatile_load(
+; CHECK-SAME: ptr noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    [[VAL:%.*]] = load volatile i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %val = load volatile i32, ptr %ptr, align 4
+  ret i32 %val
+}
+
+define i32 @test_atomic_load(ptr noalias %ptr) {
+; CHECK-LABEL: define i32 @test_atomic_load(
+; CHECK-SAME: ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VAL:%.*]] = load atomic i32, ptr [[PTR]] unordered, align 4
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %val = load atomic i32, ptr %ptr unordered, align 4
+  ret i32 %val
+}
+
+define i32 @test_atomic_volatile_load(ptr noalias %ptr) {
+; CHECK-LABEL: define i32 @test_atomic_volatile_load(
+; CHECK-SAME: ptr noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    [[VAL:%.*]] = load atomic volatile i32, ptr [[PTR]] unordered, align 4
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %val = load atomic volatile i32, ptr %ptr unordered, align 4
+  ret i32 %val
+}
+
+define i32 @test_global() {
+; CHECK-LABEL: define i32 @test_global(
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr @G, align 4
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %val = load i32, ptr @G, align 4
+  ret i32 %val
+}
+
+define internal i32 @test_internal_noalias_load(ptr %ptr) {
+; CHECK-LABEL: define internal i32 @test_internal_noalias_load(
+; CHECK-SAME: ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %val = load i32, ptr %ptr, align 4
+  ret i32 %val
+}
+
+define i32 @test_call_internal_noalias(ptr noalias %ptr) {
+; CHECK-LABEL: define i32 @test_call_internal_noalias(
+; CHECK-SAME: ptr noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR4:[0-9]+]]
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %val = call i32 @test_internal_noalias_load(ptr %ptr)
+  ret i32 %val
+}
+
+define internal i32 @test_internal_load(ptr %ptr) {
+; CHECK-LABEL: define internal i32 @test_internal_load(
+; CHECK-SAME: ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %val = load i32, ptr %ptr, align 4
+  ret i32 %val
+}
+
+define i32 @test_call_internal(ptr %ptr) {
+; CHECK-LABEL: define i32 @test_call_internal(
+; CHECK-SAME: ptr nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VAL:%.*]] = call i32 @test_internal_load(ptr nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR4]]
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %val = call i32 @test_internal_load(ptr %ptr)
+  ret i32 %val
+}
+
+define i32 @test_call_ptr() {
+; CHECK-LABEL: define i32 @test_call_ptr() {
+; CHECK-NEXT:    [[PTR:%.*]] = call ptr @get_ptr()
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %ptr = call ptr @get_ptr()
+  %val = load i32, ptr %ptr, align 4
+  ret i32 %val
+}
+
+define i32 @test_call_noalias_ptr() {
+; CHECK-LABEL: define i32 @test_call_noalias_ptr() {
+; CHECK-NEXT:    [[PTR:%.*]] = call noalias ptr @get_noalias_ptr()
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %ptr = call ptr @get_noalias_ptr()
+  %val = load i32, ptr %ptr, align 4
+  ret i32 %val
+}
+
+define i32 @test_selected_load(i1 %cond, ptr noalias %ptr.true, ptr noalias %ptr.false) {
+; CHECK-LABEL: define i32 @test_selected_load(
+; CHECK-SAME: i1 [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[PTR:%.*]] = select i1 [[COND]], ptr [[PTR_TRUE]], ptr [[PTR_FALSE]]
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %ptr = select i1 %cond, ptr %ptr.true, ptr %ptr.false
+  %val = load i32, ptr %ptr, align 4
+  ret i32 %val
+}
+
+define i32 @test_selected_load_partial_noalias(i1 %cond, ptr noalias %ptr.true, ptr %ptr.false) {
+; CHECK-LABEL: define i32 @test_selected_load_partial_noalias(
+; CHECK-SAME: i1 [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[PTR:%.*]] = select i1 [[COND]], ptr [[PTR_TRUE]], ptr [[PTR_FALSE]]
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+  %ptr = select i1 %cond, ptr %ptr.true, ptr %ptr.false
+  %val = load i32, ptr %ptr, align 4
+  ret i32 %val
+}
+
+define i32 @test_branch_load(i1 %cond, ptr noalias %ptr.true, ptr noalias %ptr.false) {
+; CHECK-LABEL: define i32 @test_branch_load(
+; CHECK-SAME: i1 noundef [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
+; CHECK:       [[TRUE]]:
+; CHECK-NEXT:    br label %[[FINISH:.*]]
+; CHECK:       [[FALSE]]:
+; CHECK-NEXT:    br label %[[FINISH]]
+; CHECK:       [[FINISH]]:
+; CHECK-NEXT:    [[PTR:%.*]] = phi ptr [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+entry:
+  br i1 %cond, label %true, label %false
+true:
+  br label %finish
+false:
+  br label %finish
+finish:
+  %ptr = phi ptr [ %ptr.true, %true ], [ %ptr.false, %false ]
+  %val = load i32, ptr %ptr, align 4
+  ret i32 %val
+}
+
+define i32 @test_branch_load_partial_noalias(i1 %cond, ptr noalias %ptr.true, ptr %ptr.false) {
+; CHECK-LABEL: define i32 @test_branch_load_partial_noalias(
+; CHECK-SAME: i1 noundef [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
+; CHECK:       [[TRUE]]:
+; CHECK-NEXT:    br label %[[FINISH:.*]]
+; CHECK:       [[FALSE]]:
+; CHECK-NEXT:    br label %[[FINISH]]
+; CHECK:       [[FINISH]]:
+; CHECK-NEXT:    [[PTR:%.*]] = phi ptr [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+entry:
+  br i1 %cond, label %true, label %false
+true:
+  br label %finish
+false:
+  br label %finish
+finish:
+  %ptr = phi ptr [ %ptr.true, %true ], [ %ptr.false, %false ]
+  %val = load i32, ptr %ptr, align 4
+  ret i32 %val
+}
+;.
+; CHECK: [[META0]] = !{}
+;.

>From e095a93c82fad7530d152b1888131feb1d1133f4 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Wed, 28 May 2025 12:42:15 -0500
Subject: [PATCH 2/8] Incorporated feedback

---
 llvm/include/llvm/Transforms/IPO/Attributor.h |   1 +
 .../Transforms/IPO/AttributorAttributes.cpp   | 128 ++++---
 .../multiple-offsets-pointer-info.ll          |   8 +-
 .../Attributor/tag-invariant-loads.ll         | 357 ++++++++++--------
 4 files changed, 268 insertions(+), 226 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 53fa7a04dc5b5..38996bb051328 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6298,6 +6298,7 @@ struct AAInvariantLoadPointer : public AbstractAttribute {
   static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
     if (!IRP.getAssociatedType()->isPointerTy())
       return false;
+
     return AbstractAttribute::isValidIRPositionForInit(A, IRP);
   }
 
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index f0647747d6c7f..dec36b3e7dcb3 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12579,7 +12579,7 @@ struct AAInvariantLoadPointerImpl
 
   ChangeStatus updateImpl(Attributor &A) override {
     if (isKnownInvariant())
-      return ChangeStatus::UNCHANGED;
+      return indicateOptimisticFixpoint();
 
     ChangeStatus Changed = ChangeStatus::UNCHANGED;
 
@@ -12605,15 +12605,13 @@ struct AAInvariantLoadPointerImpl
     const auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(
         getIRPosition(), this, DepClassTy::REQUIRED);
 
-    if (!AUO->forallUnderlyingObjects(IsInvariantLoadIfPointer)) {
-      removeAssumedBits(IS_INVARIANT);
-      return ChangeStatus::CHANGED;
-    }
+    if (!AUO->forallUnderlyingObjects(IsInvariantLoadIfPointer))
+      return indicatePessimisticFixpoint();
 
     if (!UsedAssumedInformation) {
       // pointer is known (not assumed) to be invariant
       addKnownBits(IS_INVARIANT);
-      return ChangeStatus::CHANGED;
+      return indicateOptimisticFixpoint() | Changed;
     }
 
     return Changed;
@@ -12671,24 +12669,44 @@ struct AAInvariantLoadPointerImpl
     if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS))
       return ChangeStatus::UNCHANGED;
 
-    const auto *ANoAlias = A.getOrCreateAAFor<AANoAlias>(getIRPosition(), this,
-                                                         DepClassTy::REQUIRED);
-    if (!ANoAlias)
-      return tryInferNoAlias(A);
+    const auto *F = getAssociatedFunction();
 
-    if (!ANoAlias->isAssumedNoAlias()) {
+    if (F && isCallableCC(F->getCallingConv())) {
+      // program-wide alias information cannot be inferred
       removeAssumedBits(IS_NOALIAS);
       return ChangeStatus::CHANGED;
     }
-    if (ANoAlias->isKnownNoAlias())
-      addKnownBits(IS_NOALIAS);
 
-    return ChangeStatus::UNCHANGED;
-  }
+    // try to use AANoAlias
+    if (const auto *ANoAlias = A.getOrCreateAAFor<AANoAlias>(
+            getIRPosition(), this, DepClassTy::REQUIRED)) {
+      if (ANoAlias->isKnownNoAlias()) {
+        addKnownBits(IS_NOALIAS);
+        return ChangeStatus::UNCHANGED;
+      }
+
+      if (!ANoAlias->isAssumedNoAlias()) {
+        removeAssumedBits(IS_NOALIAS);
+        return ChangeStatus::CHANGED;
+      }
+
+      return ChangeStatus::UNCHANGED;
+    }
+
+    // if the function is not callable, try to infer noalias from argument
+    // attribute, since it is applicable for the duration of the function
+    if (const auto *Arg = getAssociatedArgument()) {
+      if (Arg->hasNoAliasAttr()) {
+        addKnownBits(IS_NOALIAS);
+        return ChangeStatus::UNCHANGED;
+      }
+
+      // noalias information is not provided, and cannot be inferred,
+      // so we conservatively assume the pointer aliases.
+      removeAssumedBits(IS_NOALIAS);
+      return ChangeStatus::CHANGED;
+    }
 
-  /// Fallback method if updateNoAlias fails to infer noalias information from
-  /// AANoAlias.
-  virtual ChangeStatus tryInferNoAlias(Attributor &A) {
     return ChangeStatus::UNCHANGED;
   }
 
@@ -12696,28 +12714,45 @@ struct AAInvariantLoadPointerImpl
     if (isKnown(IS_READONLY) || !isAssumed(IS_READONLY))
       return ChangeStatus::UNCHANGED;
 
-    // AAMemoryBehavior may crash if value is global
-    if (!getAssociatedFunction())
-      return tryInferReadOnly(A);
+    const auto *F = getAssociatedFunction();
 
-    const auto *AMemoryBehavior = A.getOrCreateAAFor<AAMemoryBehavior>(
-        getIRPosition(), this, DepClassTy::REQUIRED);
-    if (!AMemoryBehavior)
-      return tryInferReadOnly(A);
+    if (!F)
+      return ChangeStatus::UNCHANGED;
 
-    if (!AMemoryBehavior->isAssumedReadOnly()) {
+    if (isCallableCC(F->getCallingConv())) {
+      // readonly attribute is only useful if applicable program-wide
       removeAssumedBits(IS_READONLY);
       return ChangeStatus::CHANGED;
     }
-    if (AMemoryBehavior->isKnownReadOnly())
-      addKnownBits(IS_READONLY);
 
-    return ChangeStatus::UNCHANGED;
-  }
+    // try to use AAMemoryBehavior to infer readonly attribute
+    if (const auto *AMemoryBehavior = A.getOrCreateAAFor<AAMemoryBehavior>(
+            getIRPosition(), this, DepClassTy::REQUIRED)) {
+      if (!AMemoryBehavior->isAssumedReadOnly()) {
+        removeAssumedBits(IS_READONLY);
+        return ChangeStatus::CHANGED;
+      }
+
+      if (AMemoryBehavior->isKnownReadOnly()) {
+        addKnownBits(IS_READONLY);
+        return ChangeStatus::UNCHANGED;
+      }
+
+      return ChangeStatus::UNCHANGED;
+    }
+
+    if (const auto *Arg = getAssociatedArgument()) {
+      if (Arg->onlyReadsMemory()) {
+        addKnownBits(IS_READONLY);
+        return ChangeStatus::UNCHANGED;
+      }
+
+      // readonly information is not provided, and cannot be inferred from
+      // AAMemoryBehavior
+      removeAssumedBits(IS_READONLY);
+      return ChangeStatus::CHANGED;
+    }
 
-  /// Fallback method if updateReadOnly fails to infer readonly information from
-  /// AAMemoryBehavior.
-  virtual ChangeStatus tryInferReadOnly(Attributor &A) {
     return ChangeStatus::UNCHANGED;
   }
 };
@@ -12741,33 +12776,6 @@ struct AAInvariantLoadPointerCallSiteReturned final
 struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl {
   AAInvariantLoadPointerArgument(const IRPosition &IRP, Attributor &A)
       : AAInvariantLoadPointerImpl(IRP, A) {}
-
-protected:
-  ChangeStatus tryInferNoAlias(Attributor &A) override {
-    const auto *Arg = getAssociatedArgument();
-    if (Arg->hasNoAliasAttr()) {
-      addKnownBits(IS_NOALIAS);
-      return ChangeStatus::UNCHANGED;
-    }
-
-    // noalias information is not provided, and cannot be inferred from
-    // AANoAlias
-    removeAssumedBits(IS_NOALIAS);
-    return ChangeStatus::CHANGED;
-  }
-
-  ChangeStatus tryInferReadOnly(Attributor &A) override {
-    const auto *Arg = getAssociatedArgument();
-    if (Arg->onlyReadsMemory()) {
-      addKnownBits(IS_READONLY);
-      return ChangeStatus::UNCHANGED;
-    }
-
-    // readonly information is not provided, and cannot be inferred from
-    // AAMemoryBehavior
-    removeAssumedBits(IS_READONLY);
-    return ChangeStatus::CHANGED;
-  }
 };
 
 struct AAInvariantLoadPointerCallSiteArgument final
diff --git a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
index 9e58a35107491..f04ac4d73340f 100644
--- a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
@@ -10,7 +10,7 @@ define i8 @select_offsets_simplifiable_1(i1 %cnd1, i1 %cnd2) {
 ; CHECK-LABEL: define {{[^@]+}}@select_offsets_simplifiable_1
 ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1)
+; CHECK-NEXT:    [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1)
 ; CHECK-NEXT:    [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 23
 ; CHECK-NEXT:    store i8 23, ptr [[GEP23]], align 4
 ; CHECK-NEXT:    [[GEP29:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 29
@@ -190,7 +190,7 @@ define i8 @select_offsets_not_simplifiable_3(i1 %cnd1, i1 %cnd2) {
 ; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_3
 ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1)
+; CHECK-NEXT:    [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1)
 ; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29
 ; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7
 ; CHECK-NEXT:    [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[SEL1]]
@@ -214,7 +214,7 @@ define i8 @select_offsets_not_simplifiable_4(i1 %cnd1, i1 %cnd2) {
 ; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_4
 ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1)
+; CHECK-NEXT:    [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1)
 ; CHECK-NEXT:    [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29
 ; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7
 ; CHECK-NEXT:    [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[SEL1]]
@@ -445,7 +445,7 @@ define i8 @phi_gep_not_simplifiable_2(i1 %cnd1, i1 %cnd2) {
 ; CHECK-LABEL: define {{[^@]+}}@phi_gep_not_simplifiable_2
 ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1)
+; CHECK-NEXT:    [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1)
 ; CHECK-NEXT:    [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 23
 ; CHECK-NEXT:    br i1 [[CND1]], label [[THEN:%.*]], label [[ELSE:%.*]]
 ; CHECK:       then:
diff --git a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
index 6df07a0d68bee..02c304822bcb8 100644
--- a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
+++ b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
@@ -1,220 +1,253 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -passes=attributor %s -S | FileCheck %s
+; RUN: opt -mtriple=amdgcn-amd-hsa -passes=attributor %s -S | FileCheck %s --check-prefix=AMDGCN
 
- at G = global i32 zeroinitializer, align 4
+ at G = addrspace(1) global i32 zeroinitializer, align 4
+declare void @clobber(i32)
+declare ptr addrspace(1) @get_ptr()
+declare noalias ptr addrspace(1) @get_noalias_ptr()
 
-declare ptr @get_ptr()
-declare noalias ptr @get_noalias_ptr()
-
-define i32 @test_plain(ptr %ptr) {
-; CHECK-LABEL: define i32 @test_plain(
-; CHECK-SAME: ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
-; CHECK-NEXT:    ret i32 [[VAL]]
+define void @test_nonkernel(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define void @test_nonkernel(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    ret void
 ;
-  %val = load i32, ptr %ptr, align 4
-  ret i32 %val
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  call void @clobber(i32 %val)
+  ret void
 }
 
-define i32 @test_noalias_ptr(ptr noalias %ptr) {
-; CHECK-LABEL: define i32 @test_noalias_ptr(
-; CHECK-SAME: ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0:![0-9]+]]
-; CHECK-NEXT:    ret i32 [[VAL]]
+define amdgpu_kernel void @test_plain(ptr addrspace(1) %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_plain(
+; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    ret void
 ;
-  %val = load i32, ptr %ptr, align 4
-  ret i32 %val
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  call void @clobber(i32 %val)
+  ret void
 }
 
-define i32 @test_swap(ptr noalias %ptr, i32 %write) {
-; CHECK-LABEL: define i32 @test_swap(
-; CHECK-SAME: ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[PTR:%.*]], i32 [[WRITE:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
-; CHECK-NEXT:    store i32 [[WRITE]], ptr [[PTR]], align 4
-; CHECK-NEXT:    ret i32 [[VAL]]
+define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_ptr(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0:![0-9]+]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    ret void
 ;
-  %val = load i32, ptr %ptr, align 4
-  store i32 %write, ptr %ptr, align 4
-  ret i32 %val
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  call void @clobber(i32 %val)
+  ret void
 }
 
-define i32 @test_volatile_load(ptr noalias %ptr) {
-; CHECK-LABEL: define i32 @test_volatile_load(
-; CHECK-SAME: ptr noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
-; CHECK-NEXT:    [[VAL:%.*]] = load volatile i32, ptr [[PTR]], align 4
-; CHECK-NEXT:    ret i32 [[VAL]]
+define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %swap) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_swap(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    store i32 [[SWAP]], ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    ret void
 ;
-  %val = load volatile i32, ptr %ptr, align 4
-  ret i32 %val
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  store i32 %swap, ptr addrspace(1) %ptr, align 4
+  call void @clobber(i32 %val)
+  ret void
 }
 
-define i32 @test_atomic_load(ptr noalias %ptr) {
-; CHECK-LABEL: define i32 @test_atomic_load(
-; CHECK-SAME: ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VAL:%.*]] = load atomic i32, ptr [[PTR]] unordered, align 4
-; CHECK-NEXT:    ret i32 [[VAL]]
+define amdgpu_kernel void @test_volatile(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_volatile(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load volatile i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    ret void
 ;
-  %val = load atomic i32, ptr %ptr unordered, align 4
-  ret i32 %val
+  %val = load volatile i32, ptr addrspace(1) %ptr, align 4
+  call void @clobber(i32 %val)
+  ret void
 }
 
-define i32 @test_atomic_volatile_load(ptr noalias %ptr) {
-; CHECK-LABEL: define i32 @test_atomic_volatile_load(
-; CHECK-SAME: ptr noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[VAL:%.*]] = load atomic volatile i32, ptr [[PTR]] unordered, align 4
-; CHECK-NEXT:    ret i32 [[VAL]]
+define amdgpu_kernel void @test_atomic(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_atomic(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    ret void
 ;
-  %val = load atomic volatile i32, ptr %ptr unordered, align 4
-  ret i32 %val
+  %val = load atomic i32, ptr addrspace(1) %ptr unordered, align 4
+  call void @clobber(i32 %val)
+  ret void
 }
 
-define i32 @test_global() {
-; CHECK-LABEL: define i32 @test_global(
-; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
-; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr @G, align 4
-; CHECK-NEXT:    ret i32 [[VAL]]
+define amdgpu_kernel void @test_global() {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_global() {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) @G, align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    ret void
 ;
-  %val = load i32, ptr @G, align 4
-  ret i32 %val
+  %val = load i32, ptr addrspace(1) @G, align 4
+  call void @clobber(i32 %val)
+  ret void
 }
 
-define internal i32 @test_internal_noalias_load(ptr %ptr) {
-; CHECK-LABEL: define internal i32 @test_internal_noalias_load(
-; CHECK-SAME: ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]]
-; CHECK-NEXT:    ret i32 [[VAL]]
+define internal i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) {
+; AMDGCN-LABEL: define internal i32 @test_internal_noalias_load(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
+; AMDGCN-NEXT:    ret i32 [[VAL]]
 ;
-  %val = load i32, ptr %ptr, align 4
-  ret i32 %val
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  ret i32 %val
+}
+
+define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_noalias(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) {
+; AMDGCN-NEXT:    [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR1:[0-9]+]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    ret void
+;
+  %val = call i32 @test_internal_noalias_load(ptr addrspace(1) %ptr)
+  call void @clobber(i32 %val)
+  ret void
 }
 
-define i32 @test_call_internal_noalias(ptr noalias %ptr) {
-; CHECK-LABEL: define i32 @test_call_internal_noalias(
-; CHECK-SAME: ptr noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR4:[0-9]+]]
-; CHECK-NEXT:    ret i32 [[VAL]]
-;
-  %val = call i32 @test_internal_noalias_load(ptr %ptr)
-  ret i32 %val
-}
+define internal i32 @test_internal_load(ptr addrspace(1) %ptr) {
+; AMDGCN-LABEL: define internal i32 @test_internal_load(
+; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR0]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    ret i32 [[VAL]]
+;
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  ret i32 %val
+}
 
-define internal i32 @test_internal_load(ptr %ptr) {
-; CHECK-LABEL: define internal i32 @test_internal_load(
-; CHECK-SAME: ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
-; CHECK-NEXT:    ret i32 [[VAL]]
-;
-  %val = load i32, ptr %ptr, align 4
-  ret i32 %val
-}
+define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal(
+; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) {
+; AMDGCN-NEXT:    [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR1]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    ret void
+;
+  %val = call i32 @test_internal_load(ptr addrspace(1) %ptr)
+  call void @clobber(i32 %val)
+  ret void
+}
 
-define i32 @test_call_internal(ptr %ptr) {
-; CHECK-LABEL: define i32 @test_call_internal(
-; CHECK-SAME: ptr nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VAL:%.*]] = call i32 @test_internal_load(ptr nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR4]]
-; CHECK-NEXT:    ret i32 [[VAL]]
-;
-  %val = call i32 @test_internal_load(ptr %ptr)
-  ret i32 %val
+define amdgpu_kernel void @test_call_ptr() {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_ptr() {
+; AMDGCN-NEXT:    [[PTR:%.*]] = call ptr addrspace(1) @get_ptr()
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    ret void
+;
+  %ptr = call ptr addrspace(1) @get_ptr()
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  call void @clobber(i32 %val)
+  ret void
 }
 
-define i32 @test_call_ptr() {
-; CHECK-LABEL: define i32 @test_call_ptr() {
-; CHECK-NEXT:    [[PTR:%.*]] = call ptr @get_ptr()
-; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
-; CHECK-NEXT:    ret i32 [[VAL]]
+define amdgpu_kernel void @test_call_noalias_ptr() {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_noalias_ptr() {
+; AMDGCN-NEXT:    [[PTR:%.*]] = call ptr addrspace(1) @get_noalias_ptr()
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    ret void
 ;
-  %ptr = call ptr @get_ptr()
-  %val = load i32, ptr %ptr, align 4
-  ret i32 %val
-}
-
-define i32 @test_call_noalias_ptr() {
-; CHECK-LABEL: define i32 @test_call_noalias_ptr() {
-; CHECK-NEXT:    [[PTR:%.*]] = call noalias ptr @get_noalias_ptr()
-; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]]
-; CHECK-NEXT:    ret i32 [[VAL]]
-;
-  %ptr = call ptr @get_noalias_ptr()
-  %val = load i32, ptr %ptr, align 4
-  ret i32 %val
+  %ptr = call ptr addrspace(1) @get_noalias_ptr()
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  call void @clobber(i32 %val)
+  ret void
 }
 
-define i32 @test_selected_load(i1 %cond, ptr noalias %ptr.true, ptr noalias %ptr.false) {
-; CHECK-LABEL: define i32 @test_selected_load(
-; CHECK-SAME: i1 [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[PTR:%.*]] = select i1 [[COND]], ptr [[PTR_TRUE]], ptr [[PTR_FALSE]]
-; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]]
-; CHECK-NEXT:    ret i32 [[VAL]]
-;
-  %ptr = select i1 %cond, ptr %ptr.true, ptr %ptr.false
-  %val = load i32, ptr %ptr, align 4
-  ret i32 %val
+define amdgpu_kernel void @test_selected_load(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load(
+; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) {
+; AMDGCN-NEXT:    [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]]
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    ret void
+;
+  %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  call void @clobber(i32 %val)
+  ret void
 }
 
-define i32 @test_selected_load_partial_noalias(i1 %cond, ptr noalias %ptr.true, ptr %ptr.false) {
-; CHECK-LABEL: define i32 @test_selected_load_partial_noalias(
-; CHECK-SAME: i1 [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[PTR:%.*]] = select i1 [[COND]], ptr [[PTR_TRUE]], ptr [[PTR_FALSE]]
-; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
-; CHECK-NEXT:    ret i32 [[VAL]]
+define amdgpu_kernel void @test_selected_load_partial_noalias(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load_partial_noalias(
+; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) {
+; AMDGCN-NEXT:    [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]]
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    ret void
 ;
-  %ptr = select i1 %cond, ptr %ptr.true, ptr %ptr.false
-  %val = load i32, ptr %ptr, align 4
-  ret i32 %val
+  %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  call void @clobber(i32 %val)
+  ret void
 }
 
-define i32 @test_branch_load(i1 %cond, ptr noalias %ptr.true, ptr noalias %ptr.false) {
-; CHECK-LABEL: define i32 @test_branch_load(
-; CHECK-SAME: i1 noundef [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
-; CHECK:       [[TRUE]]:
-; CHECK-NEXT:    br label %[[FINISH:.*]]
-; CHECK:       [[FALSE]]:
-; CHECK-NEXT:    br label %[[FINISH]]
-; CHECK:       [[FINISH]]:
-; CHECK-NEXT:    [[PTR:%.*]] = phi ptr [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
-; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]]
-; CHECK-NEXT:    ret i32 [[VAL]]
+define amdgpu_kernel void @test_branch_load(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load(
+; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) {
+; AMDGCN-NEXT:  [[ENTRY:.*:]]
+; AMDGCN-NEXT:    br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
+; AMDGCN:       [[TRUE]]:
+; AMDGCN-NEXT:    call void @clobber(i32 noundef 1)
+; AMDGCN-NEXT:    br label %[[FINISH:.*]]
+; AMDGCN:       [[FALSE]]:
+; AMDGCN-NEXT:    br label %[[FINISH]]
+; AMDGCN:       [[FINISH]]:
+; AMDGCN-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    ret void
 ;
 entry:
   br i1 %cond, label %true, label %false
 true:
+  call void @clobber(i32 1)
   br label %finish
 false:
   br label %finish
 finish:
-  %ptr = phi ptr [ %ptr.true, %true ], [ %ptr.false, %false ]
-  %val = load i32, ptr %ptr, align 4
-  ret i32 %val
-}
-
-define i32 @test_branch_load_partial_noalias(i1 %cond, ptr noalias %ptr.true, ptr %ptr.false) {
-; CHECK-LABEL: define i32 @test_branch_load_partial_noalias(
-; CHECK-SAME: i1 noundef [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
-; CHECK:       [[TRUE]]:
-; CHECK-NEXT:    br label %[[FINISH:.*]]
-; CHECK:       [[FALSE]]:
-; CHECK-NEXT:    br label %[[FINISH]]
-; CHECK:       [[FINISH]]:
-; CHECK-NEXT:    [[PTR:%.*]] = phi ptr [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
-; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
-; CHECK-NEXT:    ret i32 [[VAL]]
+  %ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ]
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_branch_load_partial_noalias(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load_partial_noalias(
+; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) {
+; AMDGCN-NEXT:  [[ENTRY:.*:]]
+; AMDGCN-NEXT:    br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
+; AMDGCN:       [[TRUE]]:
+; AMDGCN-NEXT:    call void @clobber(i32 noundef 1)
+; AMDGCN-NEXT:    br label %[[FINISH:.*]]
+; AMDGCN:       [[FALSE]]:
+; AMDGCN-NEXT:    br label %[[FINISH]]
+; AMDGCN:       [[FINISH]]:
+; AMDGCN-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    ret void
 ;
 entry:
   br i1 %cond, label %true, label %false
 true:
+  call void @clobber(i32 1)
   br label %finish
 false:
   br label %finish
 finish:
-  %ptr = phi ptr [ %ptr.true, %true ], [ %ptr.false, %false ]
-  %val = load i32, ptr %ptr, align 4
-  ret i32 %val
+  %ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ]
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  call void @clobber(i32 %val)
+  ret void
 }
 ;.
-; CHECK: [[META0]] = !{}
+; AMDGCN: [[META0]] = !{}
 ;.

>From ef97544e9bc31e61c84e7d1e8b044ac3a61ca164 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Thu, 29 May 2025 16:02:57 -0500
Subject: [PATCH 3/8] Added guards for side-effects on loads.

"Side effects" include volatile loads and atomic loads that are at least
monotonic.
---
 .../Transforms/IPO/AttributorAttributes.cpp   |  79 +++++-----
 .../Attributor/tag-invariant-loads.ll         | 139 +++++++++++-------
 2 files changed, 135 insertions(+), 83 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index dec36b3e7dcb3..b178cc5951e3d 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12542,39 +12542,44 @@ struct AAInvariantLoadPointerImpl
     : public StateWrapper<BitIntegerState<uint8_t, 7>, AAInvariantLoadPointer,
                           uint8_t> {
   // load invariance is implied by, but not equivalent to IS_NOALIAS |
-  // IS_READONLY, as load invariance is also implied by all underlying objects
+  // IS_NOEFFECT, as load invariance is also implied by all underlying objects
   // being load invariant.
   //
-  // IS_INVARIANT is set to indicate that the contents of the pointer are
-  // *known* to be invariant.
+  // IS_KNOWN_INVARIANT is set to indicate that the contents of the pointer are
+  // *known* to be invariant, and is therefore a pessimistic bit.
   enum {
-    IS_INVARIANT = 1 << 0,
+    IS_KNOWN_INVARIANT = 1 << 0,
     IS_NOALIAS = 1 << 1,
-    IS_READONLY = 1 << 2,
+    IS_NOEFFECT = 1 << 2,
+
+    IS_IMPLIED_INVARIANT = IS_NOALIAS | IS_NOEFFECT,
   };
-  static_assert(getBestState() == (IS_INVARIANT | IS_NOALIAS | IS_READONLY),
+  static_assert(getBestState() == (IS_KNOWN_INVARIANT | IS_IMPLIED_INVARIANT),
                 "Unexpected best state!");
 
   using Base = StateWrapper<BitIntegerState<uint8_t, 7>, AAInvariantLoadPointer,
                             uint8_t>;
 
-  // the BitIntegerState is optimistic about noalias and readonly, but
-  // pessimistic about invariance
+  // the BitIntegerState is optimistic about IS_NOALIAS and IS_NOEFFECT, but
+  // pessimistic about IS_KNOWN_INVARIANT
   AAInvariantLoadPointerImpl(const IRPosition &IRP, Attributor &A)
-      : Base(IRP, IS_NOALIAS | IS_READONLY) {}
+      : Base(IRP, IS_IMPLIED_INVARIANT) {}
 
   void initialize(Attributor &A) final {
-    // conservatively assume that the pointer's contents are not invariant,
-    // until proven otherwise.
-    removeAssumedBits(IS_INVARIANT);
+    removeAssumedBits(IS_KNOWN_INVARIANT);
   }
 
   bool isKnownInvariant() const final {
-    return isKnown(IS_INVARIANT) || isKnown(IS_NOALIAS | IS_READONLY);
+    return isKnown(IS_KNOWN_INVARIANT) || isKnown(IS_IMPLIED_INVARIANT);
   }
 
   bool isAssumedInvariant() const final {
-    return isAssumed(IS_INVARIANT) || isAssumed(IS_NOALIAS | IS_READONLY);
+    if (isAssumed(IS_KNOWN_INVARIANT) || isAssumed(IS_IMPLIED_INVARIANT))
+      return true;
+    // if the function is callable, optimistically assume that invariance can be
+    // inferred from the caller
+    const auto *F = getAssociatedFunction();
+    return F && isCallableCC(F->getCallingConv());
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
@@ -12583,8 +12588,12 @@ struct AAInvariantLoadPointerImpl
 
     ChangeStatus Changed = ChangeStatus::UNCHANGED;
 
-    Changed |= updateNoAlias(A);
-    Changed |= updateReadOnly(A);
+    Changed |= checkNoAlias(A);
+    Changed |= checkNoEffect(A);
+
+    // try to infer invariance from underlying objects
+    const auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(
+        getIRPosition(), this, DepClassTy::REQUIRED);
 
     bool UsedAssumedInformation = false;
     const auto IsInvariantLoadIfPointer = [&](const Value &V) {
@@ -12601,16 +12610,12 @@ struct AAInvariantLoadPointerImpl
       UsedAssumedInformation = true;
       return true;
     };
-
-    const auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(
-        getIRPosition(), this, DepClassTy::REQUIRED);
-
     if (!AUO->forallUnderlyingObjects(IsInvariantLoadIfPointer))
       return indicatePessimisticFixpoint();
 
     if (!UsedAssumedInformation) {
       // pointer is known (not assumed) to be invariant
-      addKnownBits(IS_INVARIANT);
+      addKnownBits(IS_KNOWN_INVARIANT);
       return indicateOptimisticFixpoint() | Changed;
     }
 
@@ -12639,8 +12644,6 @@ struct AAInvariantLoadPointerImpl
         return true;
 
       if (auto *LI = dyn_cast<LoadInst>(I)) {
-        if (LI->isVolatile() || LI->isAtomic())
-          return true;
 
         LI->setMetadata(LLVMContext::MD_invariant_load,
                         MDNode::get(LI->getContext(), {}));
@@ -12664,8 +12667,8 @@ struct AAInvariantLoadPointerImpl
   /// See AbstractAttribute::trackStatistics().
   void trackStatistics() const override {}
 
-protected:
-  ChangeStatus updateNoAlias(Attributor &A) {
+private:
+  ChangeStatus checkNoAlias(Attributor &A) {
     if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS))
       return ChangeStatus::UNCHANGED;
 
@@ -12710,8 +12713,8 @@ struct AAInvariantLoadPointerImpl
     return ChangeStatus::UNCHANGED;
   }
 
-  ChangeStatus updateReadOnly(Attributor &A) {
-    if (isKnown(IS_READONLY) || !isAssumed(IS_READONLY))
+  ChangeStatus checkNoEffect(Attributor &A) {
+    if (isKnown(IS_NOEFFECT) || !isAssumed(IS_NOEFFECT))
       return ChangeStatus::UNCHANGED;
 
     const auto *F = getAssociatedFunction();
@@ -12720,8 +12723,18 @@ struct AAInvariantLoadPointerImpl
       return ChangeStatus::UNCHANGED;
 
     if (isCallableCC(F->getCallingConv())) {
-      // readonly attribute is only useful if applicable program-wide
-      removeAssumedBits(IS_READONLY);
+      // effects cannot be tracked outside of function call;
+      // conservatively assume pointer has effectful uses
+      removeAssumedBits(IS_NOEFFECT);
+      return ChangeStatus::CHANGED;
+    }
+
+    const auto HasNoSideEffects = [](const Use &U, bool &) {
+      const auto *I = dyn_cast<LoadInst>(U.getUser());
+      return !I || !I->mayHaveSideEffects();
+    };
+    if (!A.checkForAllUses(HasNoSideEffects, *this, getAssociatedValue())) {
+      removeAssumedBits(IS_NOEFFECT);
       return ChangeStatus::CHANGED;
     }
 
@@ -12729,12 +12742,12 @@ struct AAInvariantLoadPointerImpl
     if (const auto *AMemoryBehavior = A.getOrCreateAAFor<AAMemoryBehavior>(
             getIRPosition(), this, DepClassTy::REQUIRED)) {
       if (!AMemoryBehavior->isAssumedReadOnly()) {
-        removeAssumedBits(IS_READONLY);
+        removeAssumedBits(IS_NOEFFECT);
         return ChangeStatus::CHANGED;
       }
 
       if (AMemoryBehavior->isKnownReadOnly()) {
-        addKnownBits(IS_READONLY);
+        addKnownBits(IS_NOEFFECT);
         return ChangeStatus::UNCHANGED;
       }
 
@@ -12743,13 +12756,13 @@ struct AAInvariantLoadPointerImpl
 
     if (const auto *Arg = getAssociatedArgument()) {
       if (Arg->onlyReadsMemory()) {
-        addKnownBits(IS_READONLY);
+        addKnownBits(IS_NOEFFECT);
         return ChangeStatus::UNCHANGED;
       }
 
       // readonly information is not provided, and cannot be inferred from
       // AAMemoryBehavior
-      removeAssumedBits(IS_READONLY);
+      removeAssumedBits(IS_NOEFFECT);
       return ChangeStatus::CHANGED;
     }
 
diff --git a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
index 02c304822bcb8..b73e6ffafbe4a 100644
--- a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
+++ b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
@@ -1,40 +1,42 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -mtriple=amdgcn-amd-hsa -passes=attributor %s -S | FileCheck %s --check-prefix=AMDGCN
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=attributor %s -S | FileCheck %s --check-prefix=AMDGCN
 
 @G = addrspace(1) global i32 zeroinitializer, align 4
-declare void @clobber(i32)
-declare ptr addrspace(1) @get_ptr()
-declare noalias ptr addrspace(1) @get_noalias_ptr()
+declare void @clobber(i32) #0
+declare ptr addrspace(1) @get_ptr() #0
+attributes #0 = { nofree norecurse nosync nounwind willreturn }
 
 define void @test_nonkernel(ptr addrspace(1) noalias %ptr) {
 ; AMDGCN-LABEL: define void @test_nonkernel(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4:[0-9]+]]
 ; AMDGCN-NEXT:    ret void
 ;
   %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; may not be !invariant.load, as the caller may modify %ptr
   call void @clobber(i32 %val)
   ret void
 }
 
 define amdgpu_kernel void @test_plain(ptr addrspace(1) %ptr) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_plain(
-; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) {
+; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
 ; AMDGCN-NEXT:    ret void
 ;
   %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; may not be !invariant.load, as %ptr may alias a pointer in @clobber
   call void @clobber(i32 %val)
   ret void
 }
 
 define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_ptr(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0:![0-9]+]]
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
 ; AMDGCN-NEXT:    ret void
 ;
   %val = load i32, ptr addrspace(1) %ptr, align 4
@@ -44,13 +46,14 @@ define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) {
 
 define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %swap) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_swap(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) #[[ATTR1]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
 ; AMDGCN-NEXT:    store i32 [[SWAP]], ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
 ; AMDGCN-NEXT:    ret void
 ;
   %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; cannot be !invariant.load due to the write to %ptr
   store i32 %swap, ptr addrspace(1) %ptr, align 4
   call void @clobber(i32 %val)
   ret void
@@ -58,21 +61,22 @@ define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %s
 
 define amdgpu_kernel void @test_volatile(ptr addrspace(1) noalias %ptr) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_volatile(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load volatile i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
 ; AMDGCN-NEXT:    ret void
 ;
   %val = load volatile i32, ptr addrspace(1) %ptr, align 4
+  ;; volatiles loads cannot be !invariant.load
   call void @clobber(i32 %val)
   ret void
 }
 
-define amdgpu_kernel void @test_atomic(ptr addrspace(1) noalias %ptr) {
-; AMDGCN-LABEL: define amdgpu_kernel void @test_atomic(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) {
-; AMDGCN-NEXT:    [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+define amdgpu_kernel void @test_unordered(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_unordered(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4, !invariant.load [[META0]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
 ; AMDGCN-NEXT:    ret void
 ;
   %val = load atomic i32, ptr addrspace(1) %ptr unordered, align 4
@@ -80,32 +84,48 @@ define amdgpu_kernel void @test_atomic(ptr addrspace(1) noalias %ptr) {
   ret void
 }
 
+define amdgpu_kernel void @test_monotonic(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_monotonic(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] monotonic, align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT:    ret void
+;
+  %val = load atomic i32, ptr addrspace(1) %ptr monotonic, align 4
+  ;; atomic loads with ordering guarantees may have side effects
+  call void @clobber(i32 %val)
+  ret void
+}
+
 define amdgpu_kernel void @test_global() {
-; AMDGCN-LABEL: define amdgpu_kernel void @test_global() {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_global(
+; AMDGCN-SAME: ) #[[ATTR1]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) @G, align 4
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
 ; AMDGCN-NEXT:    ret void
 ;
   %val = load i32, ptr addrspace(1) @G, align 4
+  ;; is not an !invariant.load as global variables may change
   call void @clobber(i32 %val)
   ret void
 }
 
 define internal i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) {
 ; AMDGCN-LABEL: define internal i32 @test_internal_noalias_load(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
 ; AMDGCN-NEXT:    ret i32 [[VAL]]
 ;
   %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; is an !invariant.load due to its only caller @test_call_internal_noalias
   ret i32 %val
 }
 
 define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias %ptr) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_noalias(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) {
-; AMDGCN-NEXT:    [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR1:[0-9]+]]
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR1]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR5:[0-9]+]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
 ; AMDGCN-NEXT:    ret void
 ;
   %val = call i32 @test_internal_noalias_load(ptr addrspace(1) %ptr)
@@ -115,19 +135,20 @@ define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias %
 
 define internal i32 @test_internal_load(ptr addrspace(1) %ptr) {
 ; AMDGCN-LABEL: define internal i32 @test_internal_load(
-; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR0]] {
+; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR3]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
 ; AMDGCN-NEXT:    ret i32 [[VAL]]
 ;
   %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; may not be an !invariant.load since the pointer in @test_call_internal may alias
   ret i32 %val
 }
 
 define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal(
-; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) {
-; AMDGCN-NEXT:    [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR1]]
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR1]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR5]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
 ; AMDGCN-NEXT:    ret void
 ;
   %val = call i32 @test_internal_load(ptr addrspace(1) %ptr)
@@ -135,74 +156,90 @@ define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) {
   ret void
 }
 
-define amdgpu_kernel void @test_call_ptr() {
-; AMDGCN-LABEL: define amdgpu_kernel void @test_call_ptr() {
-; AMDGCN-NEXT:    [[PTR:%.*]] = call ptr addrspace(1) @get_ptr()
+define internal i32 @test_internal_written(ptr addrspace(1) %ptr) {
+; AMDGCN-LABEL: define internal i32 @test_internal_written(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR3]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
-; AMDGCN-NEXT:    ret void
+; AMDGCN-NEXT:    ret i32 [[VAL]]
 ;
-  %ptr = call ptr addrspace(1) @get_ptr()
   %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; cannot be an !invariant.load because of the write in caller @test_call_internal_written
+  ret i32 %val
+}
+
+define amdgpu_kernel void @test_call_internal_written(ptr addrspace(1) noalias %ptr, i32 inreg %x) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_written(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[X:%.*]]) #[[ATTR1]] {
+; AMDGCN-NEXT:    store i32 [[X]], ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    [[VAL:%.*]] = call i32 @test_internal_written(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR]]) #[[ATTR5]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT:    ret void
+;
+  store i32 %x, ptr addrspace(1) %ptr
+  %val = call i32 @test_internal_written(ptr addrspace(1) %ptr)
   call void @clobber(i32 %val)
   ret void
 }
 
-define amdgpu_kernel void @test_call_noalias_ptr() {
-; AMDGCN-LABEL: define amdgpu_kernel void @test_call_noalias_ptr() {
-; AMDGCN-NEXT:    [[PTR:%.*]] = call ptr addrspace(1) @get_noalias_ptr()
+define amdgpu_kernel void @test_call_ptr() {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_ptr(
+; AMDGCN-SAME: ) #[[ATTR1]] {
+; AMDGCN-NEXT:    [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_ptr() #[[ATTR4]]
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
 ; AMDGCN-NEXT:    ret void
 ;
-  %ptr = call ptr addrspace(1) @get_noalias_ptr()
+  %ptr = call ptr addrspace(1) @get_ptr()
   %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; may not be an !invariant.load since %ptr may alias
   call void @clobber(i32 %val)
   ret void
 }
 
 define amdgpu_kernel void @test_selected_load(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load(
-; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) {
+; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] {
 ; AMDGCN-NEXT:    [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]]
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
 ; AMDGCN-NEXT:    ret void
 ;
   %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false
   %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; either pointer yields an !invariant.load
   call void @clobber(i32 %val)
   ret void
 }
 
 define amdgpu_kernel void @test_selected_load_partial_noalias(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load_partial_noalias(
-; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) {
+; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] {
 ; AMDGCN-NEXT:    [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]]
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
 ; AMDGCN-NEXT:    ret void
 ;
   %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false
   %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; %ptr.false may alias, so no !invariant.load
   call void @clobber(i32 %val)
   ret void
 }
 
 define amdgpu_kernel void @test_branch_load(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load(
-; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) {
+; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] {
 ; AMDGCN-NEXT:  [[ENTRY:.*:]]
 ; AMDGCN-NEXT:    br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
 ; AMDGCN:       [[TRUE]]:
-; AMDGCN-NEXT:    call void @clobber(i32 noundef 1)
+; AMDGCN-NEXT:    call void @clobber(i32 noundef 1) #[[ATTR4]]
 ; AMDGCN-NEXT:    br label %[[FINISH:.*]]
 ; AMDGCN:       [[FALSE]]:
 ; AMDGCN-NEXT:    br label %[[FINISH]]
 ; AMDGCN:       [[FINISH]]:
 ; AMDGCN-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
 ; AMDGCN-NEXT:    ret void
 ;
 entry:
@@ -215,24 +252,25 @@ false:
 finish:
   %ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ]
   %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; either pointer yields an !invariant.load
   call void @clobber(i32 %val)
   ret void
 }
 
 define amdgpu_kernel void @test_branch_load_partial_noalias(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load_partial_noalias(
-; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) {
+; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] {
 ; AMDGCN-NEXT:  [[ENTRY:.*:]]
 ; AMDGCN-NEXT:    br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
 ; AMDGCN:       [[TRUE]]:
-; AMDGCN-NEXT:    call void @clobber(i32 noundef 1)
+; AMDGCN-NEXT:    call void @clobber(i32 noundef 1) #[[ATTR4]]
 ; AMDGCN-NEXT:    br label %[[FINISH:.*]]
 ; AMDGCN:       [[FALSE]]:
 ; AMDGCN-NEXT:    br label %[[FINISH]]
 ; AMDGCN:       [[FINISH]]:
 ; AMDGCN-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
 ; AMDGCN-NEXT:    ret void
 ;
 entry:
@@ -245,6 +283,7 @@ false:
 finish:
   %ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ]
   %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; ptr.false may alias, so no !invariant.load
   call void @clobber(i32 %val)
   ret void
 }

>From fe750fd8a26093fedef3c791e264f3fa1f1415df Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Mon, 2 Jun 2025 21:23:47 -0500
Subject: [PATCH 4/8] Corrected and refactored attributor logic.

---
 llvm/include/llvm/Transforms/IPO/Attributor.h |   2 +
 .../Transforms/IPO/AttributorAttributes.cpp   | 172 +++++++++++-------
 .../Attributor/tag-invariant-loads.ll         | 118 +++++++-----
 3 files changed, 179 insertions(+), 113 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 38996bb051328..55be0838d464a 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6308,9 +6308,11 @@ struct AAInvariantLoadPointer : public AbstractAttribute {
 
   /// Return true if the pointer's contents are known to remain invariant.
   virtual bool isKnownInvariant() const = 0;
+  virtual bool isKnownLocallyInvariant() const = 0;
 
   /// Return true if the pointer's contents are assumed to remain invariant.
   virtual bool isAssumedInvariant() const = 0;
+  virtual bool isAssumedLocallyInvariant() const = 0;
 
   /// See AbstractAttribute::getName().
   StringRef getName() const override { return "AAInvariantLoadPointer"; }
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index b178cc5951e3d..cfe7611276feb 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12539,47 +12539,49 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo {
 namespace {
 
 struct AAInvariantLoadPointerImpl
-    : public StateWrapper<BitIntegerState<uint8_t, 7>, AAInvariantLoadPointer,
-                          uint8_t> {
-  // load invariance is implied by, but not equivalent to IS_NOALIAS |
-  // IS_NOEFFECT, as load invariance is also implied by all underlying objects
-  // being load invariant.
-  //
-  // IS_KNOWN_INVARIANT is set to indicate that the contents of the pointer are
-  // *known* to be invariant, and is therefore a pessimistic bit.
-  enum {
-    IS_KNOWN_INVARIANT = 1 << 0,
-    IS_NOALIAS = 1 << 1,
-    IS_NOEFFECT = 1 << 2,
+    : public StateWrapper<BitIntegerState<uint8_t, 15>,
+                          AAInvariantLoadPointer> {
 
-    IS_IMPLIED_INVARIANT = IS_NOALIAS | IS_NOEFFECT,
+  enum {
+    // pointer does not alias within the bounds of the function
+    IS_NOALIAS = 1 << 0,
+    // pointer is not involved in any effectful instructions within the bounds
+    // of the function
+    IS_NOEFFECT = 1 << 1,
+    // loads are invariant within the bounds of the function
+    IS_LOCALLY_INVARIANT = 1 << 2,
+    // memory lifetime is constrained within the bounds of the function
+    IS_LOCALLY_CONSTRAINED = 1 << 3,
+
+    IS_BEST_STATE = IS_NOALIAS | IS_NOEFFECT | IS_LOCALLY_INVARIANT |
+                    IS_LOCALLY_CONSTRAINED,
   };
-  static_assert(getBestState() == (IS_KNOWN_INVARIANT | IS_IMPLIED_INVARIANT),
-                "Unexpected best state!");
+  static_assert(getBestState() == IS_BEST_STATE, "Unexpected best state");
 
-  using Base = StateWrapper<BitIntegerState<uint8_t, 7>, AAInvariantLoadPointer,
-                            uint8_t>;
+  using Base =
+      StateWrapper<BitIntegerState<uint8_t, 15>, AAInvariantLoadPointer>;
 
   // the BitIntegerState is optimistic about IS_NOALIAS and IS_NOEFFECT, but
   // pessimistic about IS_KNOWN_INVARIANT
   AAInvariantLoadPointerImpl(const IRPosition &IRP, Attributor &A)
-      : Base(IRP, IS_IMPLIED_INVARIANT) {}
-
-  void initialize(Attributor &A) final {
-    removeAssumedBits(IS_KNOWN_INVARIANT);
-  }
+      : Base(IRP) {}
 
   bool isKnownInvariant() const final {
-    return isKnown(IS_KNOWN_INVARIANT) || isKnown(IS_IMPLIED_INVARIANT);
+    return isKnownLocallyInvariant() && isKnown(IS_LOCALLY_CONSTRAINED);
+  }
+  bool isKnownLocallyInvariant() const final {
+    if (isKnown(IS_LOCALLY_INVARIANT))
+      return true;
+    return isKnown(IS_NOALIAS | IS_NOEFFECT);
   }
 
   bool isAssumedInvariant() const final {
-    if (isAssumed(IS_KNOWN_INVARIANT) || isAssumed(IS_IMPLIED_INVARIANT))
+    return isAssumedLocallyInvariant() && isAssumed(IS_LOCALLY_CONSTRAINED);
+  }
+  bool isAssumedLocallyInvariant() const final {
+    if (isAssumed(IS_LOCALLY_INVARIANT))
       return true;
-    // if the function is callable, optimistically assume that invariance can be
-    // inferred from the caller
-    const auto *F = getAssociatedFunction();
-    return F && isCallableCC(F->getCallingConv());
+    return isAssumed(IS_NOALIAS | IS_NOEFFECT);
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
@@ -12589,6 +12591,9 @@ struct AAInvariantLoadPointerImpl
     ChangeStatus Changed = ChangeStatus::UNCHANGED;
 
     Changed |= checkNoAlias(A);
+    if (requiresNoAlias() && !isAssumed(IS_NOALIAS))
+      return indicatePessimisticFixpoint();
+
     Changed |= checkNoEffect(A);
 
     // try to infer invariance from underlying objects
@@ -12602,9 +12607,9 @@ struct AAInvariantLoadPointerImpl
       const auto *IsInvariantLoadPointer =
           A.getOrCreateAAFor<AAInvariantLoadPointer>(IRPosition::value(V), this,
                                                      DepClassTy::REQUIRED);
-      if (IsInvariantLoadPointer->isKnownInvariant())
+      if (IsInvariantLoadPointer->isKnownLocallyInvariant())
         return true;
-      if (!IsInvariantLoadPointer->isAssumedInvariant())
+      if (!IsInvariantLoadPointer->isAssumedLocallyInvariant())
         return false;
 
       UsedAssumedInformation = true;
@@ -12614,9 +12619,9 @@ struct AAInvariantLoadPointerImpl
       return indicatePessimisticFixpoint();
 
     if (!UsedAssumedInformation) {
-      // pointer is known (not assumed) to be invariant
-      addKnownBits(IS_KNOWN_INVARIANT);
-      return indicateOptimisticFixpoint() | Changed;
+      // pointer is known (not assumed) to be locally invariant
+      addKnownBits(IS_LOCALLY_INVARIANT);
+      return Changed;
     }
 
     return Changed;
@@ -12658,28 +12663,31 @@ struct AAInvariantLoadPointerImpl
 
   /// See AbstractAttribute::getAsStr().
   const std::string getAsStr(Attributor *) const override {
-    std::string Str;
-    raw_string_ostream OS(Str);
-    OS << "load invariant pointer: " << isKnown() << '\n';
-    return Str;
+    if (isKnownInvariant())
+      return "load-invariant pointer";
+    return "non-invariant pointer";
   }
 
   /// See AbstractAttribute::trackStatistics().
   void trackStatistics() const override {}
 
+protected:
+  /// Indicate that invariance necessarily requires the pointer to be noalias.
+  virtual bool requiresNoAlias() const { return false; }
+
 private:
+  bool isExternal() const {
+    const auto *F = getAssociatedFunction();
+    if (!F)
+      return true;
+    return isCallableCC(F->getCallingConv()) &&
+           getPositionKind() != IRP_CALL_SITE_RETURNED;
+  }
+
   ChangeStatus checkNoAlias(Attributor &A) {
     if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS))
       return ChangeStatus::UNCHANGED;
 
-    const auto *F = getAssociatedFunction();
-
-    if (F && isCallableCC(F->getCallingConv())) {
-      // program-wide alias information cannot be inferred
-      removeAssumedBits(IS_NOALIAS);
-      return ChangeStatus::CHANGED;
-    }
-
     // try to use AANoAlias
     if (const auto *ANoAlias = A.getOrCreateAAFor<AANoAlias>(
             getIRPosition(), this, DepClassTy::REQUIRED)) {
@@ -12696,8 +12704,8 @@ struct AAInvariantLoadPointerImpl
       return ChangeStatus::UNCHANGED;
     }
 
-    // if the function is not callable, try to infer noalias from argument
-    // attribute, since it is applicable for the duration of the function
+    // try to infer noalias from argument attribute, since it is applicable for
+    // the duration of the function
     if (const auto *Arg = getAssociatedArgument()) {
       if (Arg->hasNoAliasAttr()) {
         addKnownBits(IS_NOALIAS);
@@ -12717,34 +12725,23 @@ struct AAInvariantLoadPointerImpl
     if (isKnown(IS_NOEFFECT) || !isAssumed(IS_NOEFFECT))
       return ChangeStatus::UNCHANGED;
 
-    const auto *F = getAssociatedFunction();
-
-    if (!F)
-      return ChangeStatus::UNCHANGED;
+    if (!getAssociatedFunction())
+      return indicatePessimisticFixpoint();
 
-    if (isCallableCC(F->getCallingConv())) {
-      // effects cannot be tracked outside of function call;
-      // conservatively assume pointer has effectful uses
-      removeAssumedBits(IS_NOEFFECT);
-      return ChangeStatus::CHANGED;
-    }
+    const auto HasNoEffectLoads = [&](const Use &U, bool &) {
+      if (const auto *LI = dyn_cast<LoadInst>(U.getUser()))
+        return !LI->mayHaveSideEffects();
 
-    const auto HasNoSideEffects = [](const Use &U, bool &) {
-      const auto *I = dyn_cast<LoadInst>(U.getUser());
-      return !I || !I->mayHaveSideEffects();
+      return true;
     };
-    if (!A.checkForAllUses(HasNoSideEffects, *this, getAssociatedValue())) {
-      removeAssumedBits(IS_NOEFFECT);
-      return ChangeStatus::CHANGED;
-    }
+    if (!A.checkForAllUses(HasNoEffectLoads, *this, getAssociatedValue()))
+      return indicatePessimisticFixpoint();
 
     // try to use AAMemoryBehavior to infer readonly attribute
     if (const auto *AMemoryBehavior = A.getOrCreateAAFor<AAMemoryBehavior>(
             getIRPosition(), this, DepClassTy::REQUIRED)) {
-      if (!AMemoryBehavior->isAssumedReadOnly()) {
-        removeAssumedBits(IS_NOEFFECT);
-        return ChangeStatus::CHANGED;
-      }
+      if (!AMemoryBehavior->isAssumedReadOnly())
+        return indicatePessimisticFixpoint();
 
       if (AMemoryBehavior->isKnownReadOnly()) {
         addKnownBits(IS_NOEFFECT);
@@ -12762,8 +12759,7 @@ struct AAInvariantLoadPointerImpl
 
       // readonly information is not provided, and cannot be inferred from
       // AAMemoryBehavior
-      removeAssumedBits(IS_NOEFFECT);
-      return ChangeStatus::CHANGED;
+      return indicatePessimisticFixpoint();
     }
 
     return ChangeStatus::UNCHANGED;
@@ -12778,17 +12774,53 @@ struct AAInvariantLoadPointerFloating final : AAInvariantLoadPointerImpl {
 struct AAInvariantLoadPointerReturned final : AAInvariantLoadPointerImpl {
   AAInvariantLoadPointerReturned(const IRPosition &IRP, Attributor &A)
       : AAInvariantLoadPointerImpl(IRP, A) {}
+
+  void initialize(Attributor &) override {
+    removeAssumedBits(IS_LOCALLY_CONSTRAINED);
+  }
 };
 
 struct AAInvariantLoadPointerCallSiteReturned final
     : AAInvariantLoadPointerImpl {
   AAInvariantLoadPointerCallSiteReturned(const IRPosition &IRP, Attributor &A)
       : AAInvariantLoadPointerImpl(IRP, A) {}
+
+  void initialize(Attributor &A) override {
+    const auto *F = getAssociatedFunction();
+    assert(F && "no associated function for return from call");
+
+    // not much we can say about opaque functions
+    if (F->isDeclaration() || F->isIntrinsic()) {
+      if (!F->onlyReadsMemory() || !F->hasNoSync()) {
+        indicatePessimisticFixpoint();
+        return;
+      }
+    }
+    AAInvariantLoadPointerImpl::initialize(A);
+  }
+
+protected:
+  virtual bool requiresNoAlias() const override { return true; }
 };
 
 struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl {
   AAInvariantLoadPointerArgument(const IRPosition &IRP, Attributor &A)
       : AAInvariantLoadPointerImpl(IRP, A) {}
+
+  void initialize(Attributor &) override {
+    const auto *F = getAssociatedFunction();
+    assert(F && "no associated function to argument");
+
+    if (isCallableCC(F->getCallingConv()) && !F->hasLocalLinkage())
+      removeAssumedBits(IS_LOCALLY_CONSTRAINED);
+  }
+
+protected:
+  virtual bool requiresNoAlias() const override {
+    const auto *F = getAssociatedFunction();
+    assert(F && "no associated function to argument");
+    return !isCallableCC(F->getCallingConv());
+  }
 };
 
 struct AAInvariantLoadPointerCallSiteArgument final
diff --git a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
index b73e6ffafbe4a..4cbf3f8edc8c6 100644
--- a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
+++ b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
@@ -4,13 +4,16 @@
 @G = addrspace(1) global i32 zeroinitializer, align 4
 declare void @clobber(i32) #0
 declare ptr addrspace(1) @get_ptr() #0
+declare noalias ptr addrspace(1) @get_noalias_ptr() #0
+declare noalias ptr addrspace(1) @get_untouched_ptr() #1
 attributes #0 = { nofree norecurse nosync nounwind willreturn }
+attributes #1 = { nofree norecurse nosync nounwind willreturn readonly }
 
 define void @test_nonkernel(ptr addrspace(1) noalias %ptr) {
 ; AMDGCN-LABEL: define void @test_nonkernel(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4:[0-9]+]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5:[0-9]+]]
 ; AMDGCN-NEXT:    ret void
 ;
   %val = load i32, ptr addrspace(1) %ptr, align 4
@@ -21,9 +24,9 @@ define void @test_nonkernel(ptr addrspace(1) noalias %ptr) {
 
 define amdgpu_kernel void @test_plain(ptr addrspace(1) %ptr) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_plain(
-; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5]]
 ; AMDGCN-NEXT:    ret void
 ;
   %val = load i32, ptr addrspace(1) %ptr, align 4
@@ -34,9 +37,9 @@ define amdgpu_kernel void @test_plain(ptr addrspace(1) %ptr) {
 
 define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_ptr(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0:![0-9]+]]
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5]]
 ; AMDGCN-NEXT:    ret void
 ;
   %val = load i32, ptr addrspace(1) %ptr, align 4
@@ -46,10 +49,10 @@ define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) {
 
 define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %swap) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_swap(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) #[[ATTR2]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
 ; AMDGCN-NEXT:    store i32 [[SWAP]], ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5]]
 ; AMDGCN-NEXT:    ret void
 ;
   %val = load i32, ptr addrspace(1) %ptr, align 4
@@ -61,9 +64,9 @@ define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %s
 
 define amdgpu_kernel void @test_volatile(ptr addrspace(1) noalias %ptr) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_volatile(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load volatile i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5]]
 ; AMDGCN-NEXT:    ret void
 ;
   %val = load volatile i32, ptr addrspace(1) %ptr, align 4
@@ -74,9 +77,9 @@ define amdgpu_kernel void @test_volatile(ptr addrspace(1) noalias %ptr) {
 
 define amdgpu_kernel void @test_unordered(ptr addrspace(1) noalias %ptr) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_unordered(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4, !invariant.load [[META0]]
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5]]
 ; AMDGCN-NEXT:    ret void
 ;
   %val = load atomic i32, ptr addrspace(1) %ptr unordered, align 4
@@ -86,9 +89,9 @@ define amdgpu_kernel void @test_unordered(ptr addrspace(1) noalias %ptr) {
 
 define amdgpu_kernel void @test_monotonic(ptr addrspace(1) noalias %ptr) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_monotonic(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] monotonic, align 4
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5]]
 ; AMDGCN-NEXT:    ret void
 ;
   %val = load atomic i32, ptr addrspace(1) %ptr monotonic, align 4
@@ -99,9 +102,9 @@ define amdgpu_kernel void @test_monotonic(ptr addrspace(1) noalias %ptr) {
 
 define amdgpu_kernel void @test_global() {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_global(
-; AMDGCN-SAME: ) #[[ATTR1]] {
+; AMDGCN-SAME: ) #[[ATTR2]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) @G, align 4
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5]]
 ; AMDGCN-NEXT:    ret void
 ;
   %val = load i32, ptr addrspace(1) @G, align 4
@@ -112,7 +115,7 @@ define amdgpu_kernel void @test_global() {
 
 define internal i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) {
 ; AMDGCN-LABEL: define internal i32 @test_internal_noalias_load(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR4:[0-9]+]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
 ; AMDGCN-NEXT:    ret i32 [[VAL]]
 ;
@@ -123,9 +126,9 @@ define internal i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) {
 
 define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias %ptr) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_noalias(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR1]] {
-; AMDGCN-NEXT:    [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR5:[0-9]+]]
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR6:[0-9]+]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5]]
 ; AMDGCN-NEXT:    ret void
 ;
   %val = call i32 @test_internal_noalias_load(ptr addrspace(1) %ptr)
@@ -133,9 +136,9 @@ define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias %
   ret void
 }
 
-define internal i32 @test_internal_load(ptr addrspace(1) %ptr) {
+define internal i32 @test_internal_load(ptr addrspace(1) noalias %ptr) {
 ; AMDGCN-LABEL: define internal i32 @test_internal_load(
-; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR3]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR4]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
 ; AMDGCN-NEXT:    ret i32 [[VAL]]
 ;
@@ -146,9 +149,9 @@ define internal i32 @test_internal_load(ptr addrspace(1) %ptr) {
 
 define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal(
-; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR1]] {
-; AMDGCN-NEXT:    [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR5]]
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR6]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5]]
 ; AMDGCN-NEXT:    ret void
 ;
   %val = call i32 @test_internal_load(ptr addrspace(1) %ptr)
@@ -158,7 +161,7 @@ define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) {
 
 define internal i32 @test_internal_written(ptr addrspace(1) %ptr) {
 ; AMDGCN-LABEL: define internal i32 @test_internal_written(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR3]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR4]] {
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
 ; AMDGCN-NEXT:    ret i32 [[VAL]]
 ;
@@ -169,24 +172,24 @@ define internal i32 @test_internal_written(ptr addrspace(1) %ptr) {
 
 define amdgpu_kernel void @test_call_internal_written(ptr addrspace(1) noalias %ptr, i32 inreg %x) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_written(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[X:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree captures(none) [[PTR:%.*]], i32 inreg [[X:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = call i32 @test_internal_written(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR6]]
 ; AMDGCN-NEXT:    store i32 [[X]], ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT:    [[VAL:%.*]] = call i32 @test_internal_written(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR]]) #[[ATTR5]]
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5]]
 ; AMDGCN-NEXT:    ret void
 ;
-  store i32 %x, ptr addrspace(1) %ptr
   %val = call i32 @test_internal_written(ptr addrspace(1) %ptr)
+  store i32 %x, ptr addrspace(1) %ptr
   call void @clobber(i32 %val)
   ret void
 }
 
 define amdgpu_kernel void @test_call_ptr() {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_call_ptr(
-; AMDGCN-SAME: ) #[[ATTR1]] {
-; AMDGCN-NEXT:    [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_ptr() #[[ATTR4]]
+; AMDGCN-SAME: ) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_ptr() #[[ATTR5]]
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5]]
 ; AMDGCN-NEXT:    ret void
 ;
   %ptr = call ptr addrspace(1) @get_ptr()
@@ -196,12 +199,41 @@ define amdgpu_kernel void @test_call_ptr() {
   ret void
 }
 
+define amdgpu_kernel void @test_call_noalias_ptr() {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_noalias_ptr(
+; AMDGCN-SAME: ) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_noalias_ptr() #[[ATTR5]]
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5]]
+; AMDGCN-NEXT:    ret void
+;
+  %ptr = call ptr addrspace(1) @get_noalias_ptr()
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; may not be an !invariant.load since %ptr may have been written to before returning
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_call_untouched_ptr() {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_untouched_ptr(
+; AMDGCN-SAME: ) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[PTR:%.*]] = call noalias align 4 ptr addrspace(1) @get_untouched_ptr() #[[ATTR7:[0-9]+]]
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5]]
+; AMDGCN-NEXT:    ret void
+;
+  %ptr = call ptr addrspace(1) @get_untouched_ptr()
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  call void @clobber(i32 %val)
+  ret void
+}
+
 define amdgpu_kernel void @test_selected_load(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load(
-; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
 ; AMDGCN-NEXT:    [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]]
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5]]
 ; AMDGCN-NEXT:    ret void
 ;
   %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false
@@ -213,10 +245,10 @@ define amdgpu_kernel void @test_selected_load(i1 inreg %cond, ptr addrspace(1) n
 
 define amdgpu_kernel void @test_selected_load_partial_noalias(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load_partial_noalias(
-; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
 ; AMDGCN-NEXT:    [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]]
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5]]
 ; AMDGCN-NEXT:    ret void
 ;
   %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false
@@ -228,18 +260,18 @@ define amdgpu_kernel void @test_selected_load_partial_noalias(i1 inreg %cond, pt
 
 define amdgpu_kernel void @test_branch_load(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load(
-; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
 ; AMDGCN-NEXT:  [[ENTRY:.*:]]
 ; AMDGCN-NEXT:    br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
 ; AMDGCN:       [[TRUE]]:
-; AMDGCN-NEXT:    call void @clobber(i32 noundef 1) #[[ATTR4]]
+; AMDGCN-NEXT:    call void @clobber(i32 noundef 1) #[[ATTR5]]
 ; AMDGCN-NEXT:    br label %[[FINISH:.*]]
 ; AMDGCN:       [[FALSE]]:
 ; AMDGCN-NEXT:    br label %[[FINISH]]
 ; AMDGCN:       [[FINISH]]:
 ; AMDGCN-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5]]
 ; AMDGCN-NEXT:    ret void
 ;
 entry:
@@ -259,18 +291,18 @@ finish:
 
 define amdgpu_kernel void @test_branch_load_partial_noalias(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load_partial_noalias(
-; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
 ; AMDGCN-NEXT:  [[ENTRY:.*:]]
 ; AMDGCN-NEXT:    br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
 ; AMDGCN:       [[TRUE]]:
-; AMDGCN-NEXT:    call void @clobber(i32 noundef 1) #[[ATTR4]]
+; AMDGCN-NEXT:    call void @clobber(i32 noundef 1) #[[ATTR5]]
 ; AMDGCN-NEXT:    br label %[[FINISH:.*]]
 ; AMDGCN:       [[FALSE]]:
 ; AMDGCN-NEXT:    br label %[[FINISH]]
 ; AMDGCN:       [[FINISH]]:
 ; AMDGCN-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
 ; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5]]
 ; AMDGCN-NEXT:    ret void
 ;
 entry:

>From 2c9f8a256b359299f69dc4f036e735a1c711f958 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Tue, 3 Jun 2025 09:12:23 -0500
Subject: [PATCH 5/8] Modified checks for unrelated but affected tests

It seems the attributor cleans up more dead instructions.
---
 .../Attributor/dereferenceable-1.ll           |  1 -
 .../Attributor/value-simplify-local-remote.ll | 22 +++++++------------
 2 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/llvm/test/Transforms/Attributor/dereferenceable-1.ll b/llvm/test/Transforms/Attributor/dereferenceable-1.ll
index 07e2d5ea15752..5bff2a2e6b208 100644
--- a/llvm/test/Transforms/Attributor/dereferenceable-1.ll
+++ b/llvm/test/Transforms/Attributor/dereferenceable-1.ll
@@ -207,7 +207,6 @@ define void @f7_1(ptr %ptr, i1 %cnd) {
 ; CHECK-LABEL: define {{[^@]+}}@f7_1
 ; CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[PTR:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR2]] {
 ; CHECK-NEXT:    [[A:%.*]] = tail call i32 @unkown_f(ptr noundef nonnull align 4 dereferenceable(4) [[PTR]]) #[[ATTR1]]
-; CHECK-NEXT:    [[PTR_0:%.*]] = load i32, ptr [[PTR]], align 4
 ; CHECK-NEXT:    [[B:%.*]] = tail call i32 @unkown_f(ptr noundef nonnull align 4 dereferenceable(4) [[PTR]]) #[[ATTR1]]
 ; CHECK-NEXT:    br i1 [[CND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
 ; CHECK:       if.true:
diff --git a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll
index 374d5ba7ff52b..4767244800d21 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll
@@ -135,7 +135,7 @@ define internal %S @foo.1(ptr %foo.this) {
 ; TUNIT-NEXT:    [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8
 ; TUNIT-NEXT:    store ptr [[FOO_THIS]], ptr [[FOO_THIS]], align 8
 ; TUNIT-NEXT:    call void @bar.2(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR5:[0-9]+]]
-; TUNIT-NEXT:    [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8
+; TUNIT-NEXT:    [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8:![0-9]+]]
 ; TUNIT-NEXT:    ret [[S]] [[FOO_RET]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite)
@@ -145,7 +145,7 @@ define internal %S @foo.1(ptr %foo.this) {
 ; CGSCC-NEXT:    [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8
 ; CGSCC-NEXT:    store ptr [[FOO_THIS]], ptr [[FOO_THIS]], align 8
 ; CGSCC-NEXT:    call void @bar.2(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR6]]
-; CGSCC-NEXT:    [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8
+; CGSCC-NEXT:    [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8:![0-9]+]]
 ; CGSCC-NEXT:    ret [[S]] [[FOO_RET]]
 ;
 entry:
@@ -234,7 +234,7 @@ define internal %S @bar.5(ptr %this) {
 ; TUNIT-NEXT:    [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8
 ; TUNIT-NEXT:    store ptr [[THIS]], ptr [[THIS]], align 8
 ; TUNIT-NEXT:    call void @baz.6(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) [[RETVAL]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR4]]
-; TUNIT-NEXT:    [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8
+; TUNIT-NEXT:    [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8]]
 ; TUNIT-NEXT:    ret [[S]] [[BAR_RET]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite)
@@ -244,7 +244,7 @@ define internal %S @bar.5(ptr %this) {
 ; CGSCC-NEXT:    [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8
 ; CGSCC-NEXT:    store ptr [[THIS]], ptr [[THIS]], align 8
 ; CGSCC-NEXT:    call void @baz.6(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR9:[0-9]+]]
-; CGSCC-NEXT:    [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8
+; CGSCC-NEXT:    [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8]]
 ; CGSCC-NEXT:    ret [[S]] [[BAR_RET]]
 ;
 entry:
@@ -286,7 +286,7 @@ define internal void @boom(ptr %this, ptr %data) {
 ; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[DATA_ADDR:%.*]] = alloca ptr, i32 0, align 8
 ; TUNIT-NEXT:    store ptr [[DATA]], ptr [[DATA_ADDR]], align 8
-; TUNIT-NEXT:    [[V:%.*]] = load ptr, ptr [[DATA_ADDR]], align 8
+; TUNIT-NEXT:    [[V:%.*]] = load ptr, ptr [[DATA_ADDR]], align 8, !invariant.load [[META8]]
 ; TUNIT-NEXT:    store ptr [[V]], ptr [[THIS]], align 8
 ; TUNIT-NEXT:    ret void
 ;
@@ -342,14 +342,6 @@ define %S.2 @t3.helper() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[S_2:%.*]], align 8
 ; CHECK-NEXT:    call void @ext1(ptr noundef nonnull align 8 dereferenceable(24) [[RETVAL]])
-; CHECK-NEXT:    [[DOTFCA_0_LOAD:%.*]] = load ptr, ptr [[RETVAL]], align 8
-; CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [[S_2]] poison, ptr [[DOTFCA_0_LOAD]], 0
-; CHECK-NEXT:    [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [[S_2]], ptr [[RETVAL]], i32 0, i32 1
-; CHECK-NEXT:    [[DOTFCA_1_LOAD:%.*]] = load i64, ptr [[DOTFCA_1_GEP]], align 8
-; CHECK-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [[S_2]] [[DOTFCA_0_INSERT]], i64 [[DOTFCA_1_LOAD]], 1
-; CHECK-NEXT:    [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [[S_2]], ptr [[RETVAL]], i32 0, i32 2
-; CHECK-NEXT:    [[DOTFCA_2_LOAD:%.*]] = load i64, ptr [[DOTFCA_2_GEP]], align 8
-; CHECK-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [[S_2]] [[DOTFCA_1_INSERT]], i64 [[DOTFCA_2_LOAD]], 2
 ; CHECK-NEXT:    ret [[S_2]] zeroinitializer
 ;
 entry:
@@ -508,7 +500,7 @@ define internal %S @t4a(ptr %this) {
 ; CGSCC-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, i32 0, align 8
 ; CGSCC-NEXT:    store ptr [[THIS]], ptr [[THIS]], align 8
 ; CGSCC-NEXT:    call void @t4b(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]]) #[[ATTR6]]
-; CGSCC-NEXT:    [[TMP0:%.*]] = load [[S]], ptr [[RETVAL]], align 8
+; CGSCC-NEXT:    [[TMP0:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8]]
 ; CGSCC-NEXT:    ret [[S]] [[TMP0]]
 ;
 entry:
@@ -623,6 +615,7 @@ entry:
 ; TUNIT: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
 ; TUNIT: [[META6:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 2}
 ; TUNIT: [[META7:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3}
+; TUNIT: [[META8]] = !{}
 ;.
 ; CGSCC: [[META0:![0-9]+]] = !{i32 2, !"SDK Version", [2 x i32] [i32 11, i32 5]}
 ; CGSCC: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
@@ -632,4 +625,5 @@ entry:
 ; CGSCC: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
 ; CGSCC: [[META6:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 2}
 ; CGSCC: [[META7:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3}
+; CGSCC: [[META8]] = !{}
 ;.

>From fcbc5a27d02fe5f4ab671abaf0eada1194991815 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Tue, 3 Jun 2025 11:01:30 -0500
Subject: [PATCH 6/8] Incorporated feedback.

---
 .../Transforms/IPO/AttributorAttributes.cpp   | 112 ++++++++++--------
 1 file changed, 64 insertions(+), 48 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index cfe7611276feb..66436262bf1f7 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12569,6 +12569,7 @@ struct AAInvariantLoadPointerImpl
   bool isKnownInvariant() const final {
     return isKnownLocallyInvariant() && isKnown(IS_LOCALLY_CONSTRAINED);
   }
+
   bool isKnownLocallyInvariant() const final {
     if (isKnown(IS_LOCALLY_INVARIANT))
       return true;
@@ -12578,6 +12579,7 @@ struct AAInvariantLoadPointerImpl
   bool isAssumedInvariant() const final {
     return isAssumedLocallyInvariant() && isAssumed(IS_LOCALLY_CONSTRAINED);
   }
+
   bool isAssumedLocallyInvariant() const final {
     if (isAssumed(IS_LOCALLY_INVARIANT))
       return true;
@@ -12585,44 +12587,15 @@ struct AAInvariantLoadPointerImpl
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
-    if (isKnownInvariant())
-      return indicateOptimisticFixpoint();
-
     ChangeStatus Changed = ChangeStatus::UNCHANGED;
 
-    Changed |= checkNoAlias(A);
+    Changed |= updateNoAlias(A);
     if (requiresNoAlias() && !isAssumed(IS_NOALIAS))
       return indicatePessimisticFixpoint();
 
-    Changed |= checkNoEffect(A);
-
-    // try to infer invariance from underlying objects
-    const auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(
-        getIRPosition(), this, DepClassTy::REQUIRED);
-
-    bool UsedAssumedInformation = false;
-    const auto IsInvariantLoadIfPointer = [&](const Value &V) {
-      if (!V.getType()->isPointerTy())
-        return true;
-      const auto *IsInvariantLoadPointer =
-          A.getOrCreateAAFor<AAInvariantLoadPointer>(IRPosition::value(V), this,
-                                                     DepClassTy::REQUIRED);
-      if (IsInvariantLoadPointer->isKnownLocallyInvariant())
-        return true;
-      if (!IsInvariantLoadPointer->isAssumedLocallyInvariant())
-        return false;
-
-      UsedAssumedInformation = true;
-      return true;
-    };
-    if (!AUO->forallUnderlyingObjects(IsInvariantLoadIfPointer))
-      return indicatePessimisticFixpoint();
+    Changed |= updateNoEffect(A);
 
-    if (!UsedAssumedInformation) {
-      // pointer is known (not assumed) to be locally invariant
-      addKnownBits(IS_LOCALLY_INVARIANT);
-      return Changed;
-    }
+    Changed |= updateLocalInvariance(A);
 
     return Changed;
   }
@@ -12632,7 +12605,7 @@ struct AAInvariantLoadPointerImpl
       return ChangeStatus::UNCHANGED;
 
     ChangeStatus Changed = ChangeStatus::UNCHANGED;
-    Value *Ptr = &getAssociatedValue();
+    const Value *Ptr = &getAssociatedValue();
     const auto TagInvariantLoads = [&](const Use &U, bool &) {
       if (U.get() != Ptr)
         return true;
@@ -12649,7 +12622,6 @@ struct AAInvariantLoadPointerImpl
         return true;
 
       if (auto *LI = dyn_cast<LoadInst>(I)) {
-
         LI->setMetadata(LLVMContext::MD_invariant_load,
                         MDNode::get(LI->getContext(), {}));
         Changed = ChangeStatus::CHANGED;
@@ -12677,14 +12649,14 @@ struct AAInvariantLoadPointerImpl
 
 private:
   bool isExternal() const {
-    const auto *F = getAssociatedFunction();
+    const Function *F = getAssociatedFunction();
     if (!F)
       return true;
     return isCallableCC(F->getCallingConv()) &&
            getPositionKind() != IRP_CALL_SITE_RETURNED;
   }
 
-  ChangeStatus checkNoAlias(Attributor &A) {
+  ChangeStatus updateNoAlias(Attributor &A) {
     if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS))
       return ChangeStatus::UNCHANGED;
 
@@ -12693,7 +12665,7 @@ struct AAInvariantLoadPointerImpl
             getIRPosition(), this, DepClassTy::REQUIRED)) {
       if (ANoAlias->isKnownNoAlias()) {
         addKnownBits(IS_NOALIAS);
-        return ChangeStatus::UNCHANGED;
+        return ChangeStatus::CHANGED;
       }
 
       if (!ANoAlias->isAssumedNoAlias()) {
@@ -12706,7 +12678,7 @@ struct AAInvariantLoadPointerImpl
 
     // try to infer noalias from argument attribute, since it is applicable for
     // the duration of the function
-    if (const auto *Arg = getAssociatedArgument()) {
+    if (const Argument *Arg = getAssociatedArgument()) {
       if (Arg->hasNoAliasAttr()) {
         addKnownBits(IS_NOALIAS);
         return ChangeStatus::UNCHANGED;
@@ -12721,7 +12693,7 @@ struct AAInvariantLoadPointerImpl
     return ChangeStatus::UNCHANGED;
   }
 
-  ChangeStatus checkNoEffect(Attributor &A) {
+  ChangeStatus updateNoEffect(Attributor &A) {
     if (isKnown(IS_NOEFFECT) || !isAssumed(IS_NOEFFECT))
       return ChangeStatus::UNCHANGED;
 
@@ -12729,10 +12701,8 @@ struct AAInvariantLoadPointerImpl
       return indicatePessimisticFixpoint();
 
     const auto HasNoEffectLoads = [&](const Use &U, bool &) {
-      if (const auto *LI = dyn_cast<LoadInst>(U.getUser()))
-        return !LI->mayHaveSideEffects();
-
-      return true;
+      const auto *LI = dyn_cast<LoadInst>(U.getUser());
+      return !LI || !LI->mayHaveSideEffects();
     };
     if (!A.checkForAllUses(HasNoEffectLoads, *this, getAssociatedValue()))
       return indicatePessimisticFixpoint();
@@ -12751,7 +12721,7 @@ struct AAInvariantLoadPointerImpl
       return ChangeStatus::UNCHANGED;
     }
 
-    if (const auto *Arg = getAssociatedArgument()) {
+    if (const Argument *Arg = getAssociatedArgument()) {
       if (Arg->onlyReadsMemory()) {
         addKnownBits(IS_NOEFFECT);
         return ChangeStatus::UNCHANGED;
@@ -12764,6 +12734,47 @@ struct AAInvariantLoadPointerImpl
 
     return ChangeStatus::UNCHANGED;
   }
+
+  ChangeStatus updateLocalInvariance(Attributor &A) {
+    if (isKnown(IS_LOCALLY_INVARIANT) || !isAssumed(IS_LOCALLY_INVARIANT))
+      return ChangeStatus::UNCHANGED;
+
+    // try to infer invariance from underlying objects
+    const auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(
+        getIRPosition(), this, DepClassTy::REQUIRED);
+    if (!AUO)
+      return ChangeStatus::UNCHANGED;
+
+    bool UsedAssumedInformation = false;
+    const auto IsLocallyInvariantLoadIfPointer = [&](const Value &V) {
+      if (!V.getType()->isPointerTy())
+        return true;
+      const auto *IsInvariantLoadPointer =
+          A.getOrCreateAAFor<AAInvariantLoadPointer>(IRPosition::value(V), this,
+                                                     DepClassTy::REQUIRED);
+      // conservatively fail if invariance cannot be inferred
+      if (!IsInvariantLoadPointer)
+        return false;
+
+      if (IsInvariantLoadPointer->isKnownLocallyInvariant())
+        return true;
+      if (!IsInvariantLoadPointer->isAssumedLocallyInvariant())
+        return false;
+
+      UsedAssumedInformation = true;
+      return true;
+    };
+    if (!AUO->forallUnderlyingObjects(IsLocallyInvariantLoadIfPointer))
+      return indicatePessimisticFixpoint();
+
+    if (!UsedAssumedInformation) {
+      // pointer is known (not assumed) to be locally invariant
+      addKnownBits(IS_LOCALLY_INVARIANT);
+      return ChangeStatus::CHANGED;
+    }
+
+    return ChangeStatus::UNCHANGED;
+  }
 };
 
 struct AAInvariantLoadPointerFloating final : AAInvariantLoadPointerImpl {
@@ -12786,7 +12797,7 @@ struct AAInvariantLoadPointerCallSiteReturned final
       : AAInvariantLoadPointerImpl(IRP, A) {}
 
   void initialize(Attributor &A) override {
-    const auto *F = getAssociatedFunction();
+    const Function *F = getAssociatedFunction();
     assert(F && "no associated function for return from call");
 
     // not much we can say about opaque functions
@@ -12808,16 +12819,21 @@ struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl {
       : AAInvariantLoadPointerImpl(IRP, A) {}
 
   void initialize(Attributor &) override {
-    const auto *F = getAssociatedFunction();
+    const Function *F = getAssociatedFunction();
     assert(F && "no associated function to argument");
 
-    if (isCallableCC(F->getCallingConv()) && !F->hasLocalLinkage())
+    if (!isCallableCC(F->getCallingConv())) {
+      addKnownBits(IS_LOCALLY_CONSTRAINED);
+      return;
+    }
+
+    if (!F->hasLocalLinkage())
       removeAssumedBits(IS_LOCALLY_CONSTRAINED);
   }
 
 protected:
   virtual bool requiresNoAlias() const override {
-    const auto *F = getAssociatedFunction();
+    const Function *F = getAssociatedFunction();
     assert(F && "no associated function to argument");
     return !isCallableCC(F->getCallingConv());
   }

>From 692876e8aa07ef453c6f94d268f2df59f2b5c5b7 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Tue, 3 Jun 2025 16:51:49 -0500
Subject: [PATCH 7/8] Incorporated feedback.

---
 .../Transforms/IPO/AttributorAttributes.cpp   | 33 ++++++++++---------
 .../{ => AMDGPU}/tag-invariant-loads.ll       |  5 +--
 2 files changed, 20 insertions(+), 18 deletions(-)
 rename llvm/test/Transforms/Attributor/{ => AMDGPU}/tag-invariant-loads.ll (99%)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 66436262bf1f7..0c267bfa455fa 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12643,11 +12643,22 @@ struct AAInvariantLoadPointerImpl
   /// See AbstractAttribute::trackStatistics().
   void trackStatistics() const override {}
 
-protected:
-  /// Indicate that invariance necessarily requires the pointer to be noalias.
-  virtual bool requiresNoAlias() const { return false; }
-
 private:
+  /// Indicate that noalias is required for the pointer to be invariant.
+  bool requiresNoAlias() const {
+    switch (getPositionKind()) {
+    default:
+      return false;
+    case IRP_CALL_SITE_RETURNED:
+      return true;
+    case IRP_ARGUMENT: {
+      const Function *F = getAssociatedFunction();
+      assert(F && "no associated function for argument");
+      return !isCallableCC(F->getCallingConv());
+    }
+    }
+  }
+
   bool isExternal() const {
     const Function *F = getAssociatedFunction();
     if (!F)
@@ -12800,7 +12811,7 @@ struct AAInvariantLoadPointerCallSiteReturned final
     const Function *F = getAssociatedFunction();
     assert(F && "no associated function for return from call");
 
-    // not much we can say about opaque functions
+    // There is not much we can say about opaque functions.
     if (F->isDeclaration() || F->isIntrinsic()) {
       if (!F->onlyReadsMemory() || !F->hasNoSync()) {
         indicatePessimisticFixpoint();
@@ -12809,9 +12820,6 @@ struct AAInvariantLoadPointerCallSiteReturned final
     }
     AAInvariantLoadPointerImpl::initialize(A);
   }
-
-protected:
-  virtual bool requiresNoAlias() const override { return true; }
 };
 
 struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl {
@@ -12820,7 +12828,7 @@ struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl {
 
   void initialize(Attributor &) override {
     const Function *F = getAssociatedFunction();
-    assert(F && "no associated function to argument");
+    assert(F && "no associated function for argument");
 
     if (!isCallableCC(F->getCallingConv())) {
       addKnownBits(IS_LOCALLY_CONSTRAINED);
@@ -12830,13 +12838,6 @@ struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl {
     if (!F->hasLocalLinkage())
       removeAssumedBits(IS_LOCALLY_CONSTRAINED);
   }
-
-protected:
-  virtual bool requiresNoAlias() const override {
-    const Function *F = getAssociatedFunction();
-    assert(F && "no associated function to argument");
-    return !isCallableCC(F->getCallingConv());
-  }
 };
 
 struct AAInvariantLoadPointerCallSiteArgument final
diff --git a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
similarity index 99%
rename from llvm/test/Transforms/Attributor/tag-invariant-loads.ll
rename to llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
index 4cbf3f8edc8c6..3cf6759a28b53 100644
--- a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
+++ b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
@@ -6,8 +6,6 @@ declare void @clobber(i32) #0
 declare ptr addrspace(1) @get_ptr() #0
 declare noalias ptr addrspace(1) @get_noalias_ptr() #0
 declare noalias ptr addrspace(1) @get_untouched_ptr() #1
-attributes #0 = { nofree norecurse nosync nounwind willreturn }
-attributes #1 = { nofree norecurse nosync nounwind willreturn readonly }
 
 define void @test_nonkernel(ptr addrspace(1) noalias %ptr) {
 ; AMDGCN-LABEL: define void @test_nonkernel(
@@ -319,6 +317,9 @@ finish:
   call void @clobber(i32 %val)
   ret void
 }
+
+attributes #0 = { nofree norecurse nosync nounwind willreturn }
+attributes #1 = { nofree norecurse nosync nounwind willreturn readonly }
 ;.
 ; AMDGCN: [[META0]] = !{}
 ;.

>From ee594eaa00fdceb9345ddcec2ab2c5e1844a56b2 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Thu, 5 Jun 2025 14:54:48 -0500
Subject: [PATCH 8/8] Made default `noalias` requirement more conservative.

---
 .../Transforms/IPO/AttributorAttributes.cpp   |  7 +++--
 .../Attributor/AMDGPU/tag-invariant-loads.ll  | 28 +++++++++++++++++++
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 0c267bfa455fa..b8516985aa4e3 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12648,9 +12648,12 @@ struct AAInvariantLoadPointerImpl
   bool requiresNoAlias() const {
     switch (getPositionKind()) {
     default:
-      return false;
-    case IRP_CALL_SITE_RETURNED:
+      // Conservatively default to require noalias.
       return true;
+    case IRP_FLOAT:
+    case IRP_RETURNED:
+    case IRP_CALL_SITE:
+      return false;
     case IRP_ARGUMENT: {
       const Function *F = getAssociatedFunction();
       assert(F && "no associated function for argument");
diff --git a/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
index 3cf6759a28b53..699eedba02280 100644
--- a/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
+++ b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
@@ -45,6 +45,34 @@ define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) {
   ret void
 }
 
+define amdgpu_kernel void @test_gep(ptr addrspace(1) %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_gep(
+; AMDGCN-SAME: ptr addrspace(1) nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i32 4
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5]]
+; AMDGCN-NEXT:    ret void
+;
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
+  %val = load i32, ptr addrspace(1) %gep, align 4
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_noalias_gep(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_gep(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i32 4
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4, !invariant.load [[META0]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR5]]
+; AMDGCN-NEXT:    ret void
+;
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
+  %val = load i32, ptr addrspace(1) %gep, align 4
+  call void @clobber(i32 %val)
+  ret void
+}
+
 define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %swap) {
 ; AMDGCN-LABEL: define amdgpu_kernel void @test_swap(
 ; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) #[[ATTR2]] {



More information about the llvm-commits mailing list