[llvm] 25dcd23 - [IPO] Added attributor for identifying invariant loads (#141800)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 16 09:16:50 PDT 2025


Author: zGoldthorpe
Date: 2025-06-16T11:16:47-05:00
New Revision: 25dcd231bfee1120c21b102e074542c54fb7c5c2

URL: https://github.com/llvm/llvm-project/commit/25dcd231bfee1120c21b102e074542c54fb7c5c2
DIFF: https://github.com/llvm/llvm-project/commit/25dcd231bfee1120c21b102e074542c54fb7c5c2.diff

LOG: [IPO] Added attributor for identifying invariant loads (#141800)

The attributor conservatively marks pointers whose loads are eligible to
be marked as `!invariant.load`.
It does so by identifying:
1. Pointers marked `noalias` and `readonly`
2. Pointers whose underlying objects are all eligible for invariant
loads.

The attributor then manifests this attribute at non-atomic non-volatile
load instructions.

Added: 
    llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll

Modified: 
    llvm/include/llvm/Transforms/IPO/Attributor.h
    llvm/lib/Transforms/IPO/Attributor.cpp
    llvm/lib/Transforms/IPO/AttributorAttributes.cpp
    llvm/test/Transforms/Attributor/dereferenceable-1.ll
    llvm/test/Transforms/Attributor/value-simplify-local-remote.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index e6eb756df987d..f19f3292c4798 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6335,6 +6335,47 @@ struct AAUnderlyingObjects : AbstractAttribute {
                           AA::ValueScope Scope = AA::Interprocedural) const = 0;
 };
 
+/// An abstract interface for identifying pointers from which loads can be
+/// marked invariant.
+struct AAInvariantLoadPointer : public AbstractAttribute {
+  AAInvariantLoadPointer(const IRPosition &IRP) : AbstractAttribute(IRP) {}
+
+  /// See AbstractAttribute::isValidIRPositionForInit
+  static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+    if (!IRP.getAssociatedType()->isPointerTy())
+      return false;
+
+    return AbstractAttribute::isValidIRPositionForInit(A, IRP);
+  }
+
+  /// Create an abstract attribute view for the position \p IRP.
+  static AAInvariantLoadPointer &createForPosition(const IRPosition &IRP,
+                                                   Attributor &A);
+
+  /// Return true if the pointer's contents are known to remain invariant.
+  virtual bool isKnownInvariant() const = 0;
+  virtual bool isKnownLocallyInvariant() const = 0;
+
+  /// Return true if the pointer's contents are assumed to remain invariant.
+  virtual bool isAssumedInvariant() const = 0;
+  virtual bool isAssumedLocallyInvariant() const = 0;
+
+  /// See AbstractAttribute::getName().
+  StringRef getName() const override { return "AAInvariantLoadPointer"; }
+
+  /// See AbstractAttribute::getIdAddr().
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AAInvariantLoadPointer
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
+  /// Unique ID (due to the unique address).
+  static const char ID;
+};
+
 /// An abstract interface for address space information.
 struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
   AAAddressSpace(const IRPosition &IRP, Attributor &A)

diff  --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index dac1f7a30c370..a2548258ddaf0 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -3612,6 +3612,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
       if (SimplifyAllLoads)
         getAssumedSimplified(IRPosition::value(I), nullptr,
                              UsedAssumedInformation, AA::Intraprocedural);
+      getOrCreateAAFor<AAInvariantLoadPointer>(
+          IRPosition::value(*LI->getPointerOperand()));
       getOrCreateAAFor<AAAddressSpace>(
           IRPosition::value(*LI->getPointerOperand()));
     } else {

diff  --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 3799a696f67af..5cb8f888354bd 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -191,6 +191,7 @@ PIPE_OPERATOR(AAInterFnReachability)
 PIPE_OPERATOR(AAPointerInfo)
 PIPE_OPERATOR(AAAssumptionInfo)
 PIPE_OPERATOR(AAUnderlyingObjects)
+PIPE_OPERATOR(AAInvariantLoadPointer)
 PIPE_OPERATOR(AAAddressSpace)
 PIPE_OPERATOR(AAAllocationInfo)
 PIPE_OPERATOR(AAIndirectCallInfo)
@@ -12533,6 +12534,342 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo {
 };
 } // namespace
 
+/// --------------------- Invariant Load Pointer -------------------------------
+namespace {
+
+struct AAInvariantLoadPointerImpl
+    : public StateWrapper<BitIntegerState<uint8_t, 15>,
+                          AAInvariantLoadPointer> {
+
+  enum {
+    // pointer does not alias within the bounds of the function
+    IS_NOALIAS = 1 << 0,
+    // pointer is not involved in any effectful instructions within the bounds
+    // of the function
+    IS_NOEFFECT = 1 << 1,
+    // loads are invariant within the bounds of the function
+    IS_LOCALLY_INVARIANT = 1 << 2,
+    // memory lifetime is constrained within the bounds of the function
+    IS_LOCALLY_CONSTRAINED = 1 << 3,
+
+    IS_BEST_STATE = IS_NOALIAS | IS_NOEFFECT | IS_LOCALLY_INVARIANT |
+                    IS_LOCALLY_CONSTRAINED,
+  };
+  static_assert(getBestState() == IS_BEST_STATE, "Unexpected best state");
+
+  using Base =
+      StateWrapper<BitIntegerState<uint8_t, 15>, AAInvariantLoadPointer>;
+
+  // the BitIntegerState is optimistic about IS_NOALIAS and IS_NOEFFECT, but
+  // pessimistic about IS_KNOWN_INVARIANT
+  AAInvariantLoadPointerImpl(const IRPosition &IRP, Attributor &A)
+      : Base(IRP) {}
+
+  bool isKnownInvariant() const final {
+    return isKnownLocallyInvariant() && isKnown(IS_LOCALLY_CONSTRAINED);
+  }
+
+  bool isKnownLocallyInvariant() const final {
+    if (isKnown(IS_LOCALLY_INVARIANT))
+      return true;
+    return isKnown(IS_NOALIAS | IS_NOEFFECT);
+  }
+
+  bool isAssumedInvariant() const final {
+    return isAssumedLocallyInvariant() && isAssumed(IS_LOCALLY_CONSTRAINED);
+  }
+
+  bool isAssumedLocallyInvariant() const final {
+    if (isAssumed(IS_LOCALLY_INVARIANT))
+      return true;
+    return isAssumed(IS_NOALIAS | IS_NOEFFECT);
+  }
+
+  ChangeStatus updateImpl(Attributor &A) override {
+    ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+    Changed |= updateNoAlias(A);
+    if (requiresNoAlias() && !isAssumed(IS_NOALIAS))
+      return indicatePessimisticFixpoint();
+
+    Changed |= updateNoEffect(A);
+
+    Changed |= updateLocalInvariance(A);
+
+    return Changed;
+  }
+
+  ChangeStatus manifest(Attributor &A) override {
+    if (!isKnownInvariant())
+      return ChangeStatus::UNCHANGED;
+
+    ChangeStatus Changed = ChangeStatus::UNCHANGED;
+    const Value *Ptr = &getAssociatedValue();
+    const auto TagInvariantLoads = [&](const Use &U, bool &) {
+      if (U.get() != Ptr)
+        return true;
+      auto *I = dyn_cast<Instruction>(U.getUser());
+      if (!I)
+        return true;
+
+      // Ensure that we are only changing uses from the corresponding callgraph
+      // SSC in the case that the AA isn't run on the entire module
+      if (!A.isRunOn(I->getFunction()))
+        return true;
+
+      if (I->hasMetadata(LLVMContext::MD_invariant_load))
+        return true;
+
+      if (auto *LI = dyn_cast<LoadInst>(I)) {
+        LI->setMetadata(LLVMContext::MD_invariant_load,
+                        MDNode::get(LI->getContext(), {}));
+        Changed = ChangeStatus::CHANGED;
+      }
+      return true;
+    };
+
+    (void)A.checkForAllUses(TagInvariantLoads, *this, *Ptr);
+    return Changed;
+  }
+
+  /// See AbstractAttribute::getAsStr().
+  const std::string getAsStr(Attributor *) const override {
+    if (isKnownInvariant())
+      return "load-invariant pointer";
+    return "non-invariant pointer";
+  }
+
+  /// See AbstractAttribute::trackStatistics().
+  void trackStatistics() const override {}
+
+private:
+  /// Indicate that noalias is required for the pointer to be invariant.
+  bool requiresNoAlias() const {
+    switch (getPositionKind()) {
+    default:
+      // Conservatively default to require noalias.
+      return true;
+    case IRP_FLOAT:
+    case IRP_RETURNED:
+    case IRP_CALL_SITE:
+      return false;
+    case IRP_CALL_SITE_RETURNED: {
+      const auto &CB = cast<CallBase>(getAnchorValue());
+      return !isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
+          &CB, /*MustPreserveNullness=*/false);
+    }
+    case IRP_ARGUMENT: {
+      const Function *F = getAssociatedFunction();
+      assert(F && "no associated function for argument");
+      return !isCallableCC(F->getCallingConv());
+    }
+    }
+  }
+
+  bool isExternal() const {
+    const Function *F = getAssociatedFunction();
+    if (!F)
+      return true;
+    return isCallableCC(F->getCallingConv()) &&
+           getPositionKind() != IRP_CALL_SITE_RETURNED;
+  }
+
+  ChangeStatus updateNoAlias(Attributor &A) {
+    if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS))
+      return ChangeStatus::UNCHANGED;
+
+    // Try to use AANoAlias.
+    if (const auto *ANoAlias = A.getOrCreateAAFor<AANoAlias>(
+            getIRPosition(), this, DepClassTy::REQUIRED)) {
+      if (ANoAlias->isKnownNoAlias()) {
+        addKnownBits(IS_NOALIAS);
+        return ChangeStatus::CHANGED;
+      }
+
+      if (!ANoAlias->isAssumedNoAlias()) {
+        removeAssumedBits(IS_NOALIAS);
+        return ChangeStatus::CHANGED;
+      }
+
+      return ChangeStatus::UNCHANGED;
+    }
+
+    // Try to infer noalias from argument attribute, since it is applicable for
+    // the duration of the function.
+    if (const Argument *Arg = getAssociatedArgument()) {
+      if (Arg->hasNoAliasAttr()) {
+        addKnownBits(IS_NOALIAS);
+        return ChangeStatus::UNCHANGED;
+      }
+
+      // Noalias information is not provided, and cannot be inferred,
+      // so we conservatively assume the pointer aliases.
+      removeAssumedBits(IS_NOALIAS);
+      return ChangeStatus::CHANGED;
+    }
+
+    return ChangeStatus::UNCHANGED;
+  }
+
+  ChangeStatus updateNoEffect(Attributor &A) {
+    if (isKnown(IS_NOEFFECT) || !isAssumed(IS_NOEFFECT))
+      return ChangeStatus::UNCHANGED;
+
+    if (!getAssociatedFunction())
+      return indicatePessimisticFixpoint();
+
+    const auto HasNoEffectLoads = [&](const Use &U, bool &) {
+      const auto *LI = dyn_cast<LoadInst>(U.getUser());
+      return !LI || !LI->mayHaveSideEffects();
+    };
+    if (!A.checkForAllUses(HasNoEffectLoads, *this, getAssociatedValue()))
+      return indicatePessimisticFixpoint();
+
+    // Try to use AAMemoryBehavior to infer readonly attribute.
+    if (const auto *AMemoryBehavior = A.getOrCreateAAFor<AAMemoryBehavior>(
+            getIRPosition(), this, DepClassTy::REQUIRED)) {
+      if (!AMemoryBehavior->isAssumedReadOnly())
+        return indicatePessimisticFixpoint();
+
+      if (AMemoryBehavior->isKnownReadOnly()) {
+        addKnownBits(IS_NOEFFECT);
+        return ChangeStatus::UNCHANGED;
+      }
+
+      return ChangeStatus::UNCHANGED;
+    }
+
+    if (const Argument *Arg = getAssociatedArgument()) {
+      if (Arg->onlyReadsMemory()) {
+        addKnownBits(IS_NOEFFECT);
+        return ChangeStatus::UNCHANGED;
+      }
+
+      // Readonly information is not provided, and cannot be inferred from
+      // AAMemoryBehavior.
+      return indicatePessimisticFixpoint();
+    }
+
+    return ChangeStatus::UNCHANGED;
+  }
+
+  ChangeStatus updateLocalInvariance(Attributor &A) {
+    if (isKnown(IS_LOCALLY_INVARIANT) || !isAssumed(IS_LOCALLY_INVARIANT))
+      return ChangeStatus::UNCHANGED;
+
+    // try to infer invariance from underlying objects
+    const auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(
+        getIRPosition(), this, DepClassTy::REQUIRED);
+    if (!AUO)
+      return ChangeStatus::UNCHANGED;
+
+    bool UsedAssumedInformation = false;
+    const auto IsLocallyInvariantLoadIfPointer = [&](const Value &V) {
+      if (!V.getType()->isPointerTy())
+        return true;
+      const auto *IsInvariantLoadPointer =
+          A.getOrCreateAAFor<AAInvariantLoadPointer>(IRPosition::value(V), this,
+                                                     DepClassTy::REQUIRED);
+      // Conservatively fail if invariance cannot be inferred.
+      if (!IsInvariantLoadPointer)
+        return false;
+
+      if (IsInvariantLoadPointer->isKnownLocallyInvariant())
+        return true;
+      if (!IsInvariantLoadPointer->isAssumedLocallyInvariant())
+        return false;
+
+      UsedAssumedInformation = true;
+      return true;
+    };
+    if (!AUO->forallUnderlyingObjects(IsLocallyInvariantLoadIfPointer))
+      return indicatePessimisticFixpoint();
+
+    if (const auto *CB = dyn_cast<CallBase>(&getAnchorValue())) {
+      if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
+              CB, /*MustPreserveNullness=*/false)) {
+        for (const Value *Arg : CB->args()) {
+          if (!IsLocallyInvariantLoadIfPointer(*Arg))
+            return indicatePessimisticFixpoint();
+        }
+      }
+    }
+
+    if (!UsedAssumedInformation) {
+      // Pointer is known and not just assumed to be locally invariant.
+      addKnownBits(IS_LOCALLY_INVARIANT);
+      return ChangeStatus::CHANGED;
+    }
+
+    return ChangeStatus::UNCHANGED;
+  }
+};
+
+struct AAInvariantLoadPointerFloating final : AAInvariantLoadPointerImpl {
+  AAInvariantLoadPointerFloating(const IRPosition &IRP, Attributor &A)
+      : AAInvariantLoadPointerImpl(IRP, A) {}
+};
+
+struct AAInvariantLoadPointerReturned final : AAInvariantLoadPointerImpl {
+  AAInvariantLoadPointerReturned(const IRPosition &IRP, Attributor &A)
+      : AAInvariantLoadPointerImpl(IRP, A) {}
+
+  void initialize(Attributor &) override {
+    removeAssumedBits(IS_LOCALLY_CONSTRAINED);
+  }
+};
+
+struct AAInvariantLoadPointerCallSiteReturned final
+    : AAInvariantLoadPointerImpl {
+  AAInvariantLoadPointerCallSiteReturned(const IRPosition &IRP, Attributor &A)
+      : AAInvariantLoadPointerImpl(IRP, A) {}
+
+  void initialize(Attributor &A) override {
+    const Function *F = getAssociatedFunction();
+    assert(F && "no associated function for return from call");
+
+    if (!F->isDeclaration() && !F->isIntrinsic())
+      return AAInvariantLoadPointerImpl::initialize(A);
+
+    const auto &CB = cast<CallBase>(getAnchorValue());
+    if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
+            &CB, /*MustPreserveNullness=*/false))
+      return AAInvariantLoadPointerImpl::initialize(A);
+
+    if (F->onlyReadsMemory() && F->hasNoSync())
+      return AAInvariantLoadPointerImpl::initialize(A);
+
+    // At this point, the function is opaque, so we conservatively assume
+    // non-invariance.
+    indicatePessimisticFixpoint();
+  }
+};
+
+struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl {
+  AAInvariantLoadPointerArgument(const IRPosition &IRP, Attributor &A)
+      : AAInvariantLoadPointerImpl(IRP, A) {}
+
+  void initialize(Attributor &) override {
+    const Function *F = getAssociatedFunction();
+    assert(F && "no associated function for argument");
+
+    if (!isCallableCC(F->getCallingConv())) {
+      addKnownBits(IS_LOCALLY_CONSTRAINED);
+      return;
+    }
+
+    if (!F->hasLocalLinkage())
+      removeAssumedBits(IS_LOCALLY_CONSTRAINED);
+  }
+};
+
+struct AAInvariantLoadPointerCallSiteArgument final
+    : AAInvariantLoadPointerImpl {
+  AAInvariantLoadPointerCallSiteArgument(const IRPosition &IRP, Attributor &A)
+      : AAInvariantLoadPointerImpl(IRP, A) {}
+};
+} // namespace
+
 /// ------------------------ Address Space  ------------------------------------
 namespace {
 
@@ -13038,6 +13375,7 @@ const char AAInterFnReachability::ID = 0;
 const char AAPointerInfo::ID = 0;
 const char AAAssumptionInfo::ID = 0;
 const char AAUnderlyingObjects::ID = 0;
+const char AAInvariantLoadPointer::ID = 0;
 const char AAAddressSpace::ID = 0;
 const char AAAllocationInfo::ID = 0;
 const char AAIndirectCallInfo::ID = 0;
@@ -13172,6 +13510,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFPClass)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInvariantLoadPointer)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAddressSpace)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAllocationInfo)
 

diff  --git a/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
new file mode 100644
index 0000000000000..ace68a19bf41f
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
@@ -0,0 +1,382 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=attributor %s -S | FileCheck %s --check-prefix=AMDGCN
+
+ at G = addrspace(1) global i32 zeroinitializer, align 4
+declare void @clobber(i32) #0
+declare ptr addrspace(1) @get_ptr() #0
+declare noalias ptr addrspace(1) @get_noalias_ptr() #0
+declare noalias ptr addrspace(1) @get_untouched_ptr() #1
+
+define void @test_nonkernel(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define void @test_nonkernel(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6:[0-9]+]]
+; AMDGCN-NEXT:    ret void
+;
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; may not be !invariant.load, as the caller may modify %ptr
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_plain(ptr addrspace(1) %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_plain(
+; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; may not be !invariant.load, as %ptr may alias a pointer in @clobber
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_ptr(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0:![0-9]+]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_gep(ptr addrspace(1) %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_gep(
+; AMDGCN-SAME: ptr addrspace(1) nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i32 4
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
+  %val = load i32, ptr addrspace(1) %gep, align 4
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_noalias_gep(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_gep(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i32 4
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4, !invariant.load [[META0]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
+  %val = load i32, ptr addrspace(1) %gep, align 4
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %swap) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_swap(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    store i32 [[SWAP]], ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; cannot be !invariant.load due to the write to %ptr
+  store i32 %swap, ptr addrspace(1) %ptr, align 4
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_volatile(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_volatile(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load volatile i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+  %val = load volatile i32, ptr addrspace(1) %ptr, align 4
+  ;; volatiles loads cannot be !invariant.load
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_unordered(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_unordered(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4, !invariant.load [[META0]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+  %val = load atomic i32, ptr addrspace(1) %ptr unordered, align 4
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_monotonic(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_monotonic(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] monotonic, align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+  %val = load atomic i32, ptr addrspace(1) %ptr monotonic, align 4
+  ;; atomic loads with ordering guarantees may have side effects
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_global() {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_global(
+; AMDGCN-SAME: ) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) @G, align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+  %val = load i32, ptr addrspace(1) @G, align 4
+  ;; is not an !invariant.load as global variables may change
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define internal i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) {
+; AMDGCN-LABEL: define internal i32 @test_internal_noalias_load(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR4:[0-9]+]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
+; AMDGCN-NEXT:    ret i32 [[VAL]]
+;
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; is an !invariant.load due to its only caller @test_call_internal_noalias
+  ret i32 %val
+}
+
+define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_noalias(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR7:[0-9]+]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+  %val = call i32 @test_internal_noalias_load(ptr addrspace(1) %ptr)
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define internal i32 @test_internal_load(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define internal i32 @test_internal_load(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR4]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    ret i32 [[VAL]]
+;
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; may not be an !invariant.load since the pointer in @test_call_internal may alias
+  ret i32 %val
+}
+
+define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal(
+; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR7]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+  %val = call i32 @test_internal_load(ptr addrspace(1) %ptr)
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define internal i32 @test_internal_written(ptr addrspace(1) %ptr) {
+; AMDGCN-LABEL: define internal i32 @test_internal_written(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR4]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    ret i32 [[VAL]]
+;
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; cannot be an !invariant.load because of the write in caller @test_call_internal_written
+  ret i32 %val
+}
+
+define amdgpu_kernel void @test_call_internal_written(ptr addrspace(1) noalias %ptr, i32 inreg %x) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_written(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree captures(none) [[PTR:%.*]], i32 inreg [[X:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[VAL:%.*]] = call i32 @test_internal_written(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR7]]
+; AMDGCN-NEXT:    store i32 [[X]], ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+  %val = call i32 @test_internal_written(ptr addrspace(1) %ptr)
+  store i32 %x, ptr addrspace(1) %ptr
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_call_ptr() {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_ptr(
+; AMDGCN-SAME: ) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_ptr() #[[ATTR6]]
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+  %ptr = call ptr addrspace(1) @get_ptr()
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; may not be an !invariant.load since %ptr may alias
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_call_noalias_ptr() {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_noalias_ptr(
+; AMDGCN-SAME: ) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_noalias_ptr() #[[ATTR6]]
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+  %ptr = call ptr addrspace(1) @get_noalias_ptr()
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; may not be an !invariant.load since %ptr may have been written to before returning
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_call_untouched_ptr() {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_untouched_ptr(
+; AMDGCN-SAME: ) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[PTR:%.*]] = call noalias align 4 ptr addrspace(1) @get_untouched_ptr() #[[ATTR8:[0-9]+]]
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+  %ptr = call ptr addrspace(1) @get_untouched_ptr()
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_make_buffer(ptr addrspace(1) %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer(
+; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i32 noundef 0, i32 noundef 0) #[[ATTR9:[0-9]+]]
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+  %rsrc = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %ptr, i16 0, i32 0, i32 0)
+  %val = load i32, ptr addrspace(7) %rsrc, align 4
+  ;; original %ptr may alias
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_make_buffer_noalias(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer_noalias(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i32 noundef 0, i32 noundef 0) #[[ATTR9]]
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4, !invariant.load [[META0]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+  %rsrc = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %ptr, i16 0, i32 0, i32 0)
+  %val = load i32, ptr addrspace(7) %rsrc, align 4
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_selected_load(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load(
+; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]]
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+  %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; either pointer yields an !invariant.load
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_selected_load_partial_noalias(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load_partial_noalias(
+; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:    [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]]
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+  %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; %ptr.false may alias, so no !invariant.load
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_branch_load(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load(
+; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:  [[ENTRY:.*:]]
+; AMDGCN-NEXT:    br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
+; AMDGCN:       [[TRUE]]:
+; AMDGCN-NEXT:    call void @clobber(i32 noundef 1) #[[ATTR6]]
+; AMDGCN-NEXT:    br label %[[FINISH:.*]]
+; AMDGCN:       [[FALSE]]:
+; AMDGCN-NEXT:    br label %[[FINISH]]
+; AMDGCN:       [[FINISH]]:
+; AMDGCN-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+entry:
+  br i1 %cond, label %true, label %false
+true:
+  call void @clobber(i32 1)
+  br label %finish
+false:
+  br label %finish
+finish:
+  %ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ]
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; either pointer yields an !invariant.load
+  call void @clobber(i32 %val)
+  ret void
+}
+
+define amdgpu_kernel void @test_branch_load_partial_noalias(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load_partial_noalias(
+; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT:  [[ENTRY:.*:]]
+; AMDGCN-NEXT:    br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
+; AMDGCN:       [[TRUE]]:
+; AMDGCN-NEXT:    call void @clobber(i32 noundef 1) #[[ATTR6]]
+; AMDGCN-NEXT:    br label %[[FINISH:.*]]
+; AMDGCN:       [[FALSE]]:
+; AMDGCN-NEXT:    br label %[[FINISH]]
+; AMDGCN:       [[FINISH]]:
+; AMDGCN-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
+; AMDGCN-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT:    call void @clobber(i32 [[VAL]]) #[[ATTR6]]
+; AMDGCN-NEXT:    ret void
+;
+entry:
+  br i1 %cond, label %true, label %false
+true:
+  call void @clobber(i32 1)
+  br label %finish
+false:
+  br label %finish
+finish:
+  %ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ]
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  ;; ptr.false may alias, so no !invariant.load
+  call void @clobber(i32 %val)
+  ret void
+}
+
+attributes #0 = { nofree norecurse nosync nounwind willreturn }
+attributes #1 = { nofree norecurse nosync nounwind willreturn readonly }
+;.
+; AMDGCN: [[META0]] = !{}
+;.

diff  --git a/llvm/test/Transforms/Attributor/dereferenceable-1.ll b/llvm/test/Transforms/Attributor/dereferenceable-1.ll
index 07e2d5ea15752..5bff2a2e6b208 100644
--- a/llvm/test/Transforms/Attributor/dereferenceable-1.ll
+++ b/llvm/test/Transforms/Attributor/dereferenceable-1.ll
@@ -207,7 +207,6 @@ define void @f7_1(ptr %ptr, i1 %cnd) {
 ; CHECK-LABEL: define {{[^@]+}}@f7_1
 ; CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[PTR:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR2]] {
 ; CHECK-NEXT:    [[A:%.*]] = tail call i32 @unkown_f(ptr noundef nonnull align 4 dereferenceable(4) [[PTR]]) #[[ATTR1]]
-; CHECK-NEXT:    [[PTR_0:%.*]] = load i32, ptr [[PTR]], align 4
 ; CHECK-NEXT:    [[B:%.*]] = tail call i32 @unkown_f(ptr noundef nonnull align 4 dereferenceable(4) [[PTR]]) #[[ATTR1]]
 ; CHECK-NEXT:    br i1 [[CND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
 ; CHECK:       if.true:

diff  --git a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll
index 374d5ba7ff52b..4767244800d21 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll
@@ -135,7 +135,7 @@ define internal %S @foo.1(ptr %foo.this) {
 ; TUNIT-NEXT:    [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8
 ; TUNIT-NEXT:    store ptr [[FOO_THIS]], ptr [[FOO_THIS]], align 8
 ; TUNIT-NEXT:    call void @bar.2(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR5:[0-9]+]]
-; TUNIT-NEXT:    [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8
+; TUNIT-NEXT:    [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8:![0-9]+]]
 ; TUNIT-NEXT:    ret [[S]] [[FOO_RET]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite)
@@ -145,7 +145,7 @@ define internal %S @foo.1(ptr %foo.this) {
 ; CGSCC-NEXT:    [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8
 ; CGSCC-NEXT:    store ptr [[FOO_THIS]], ptr [[FOO_THIS]], align 8
 ; CGSCC-NEXT:    call void @bar.2(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR6]]
-; CGSCC-NEXT:    [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8
+; CGSCC-NEXT:    [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8:![0-9]+]]
 ; CGSCC-NEXT:    ret [[S]] [[FOO_RET]]
 ;
 entry:
@@ -234,7 +234,7 @@ define internal %S @bar.5(ptr %this) {
 ; TUNIT-NEXT:    [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8
 ; TUNIT-NEXT:    store ptr [[THIS]], ptr [[THIS]], align 8
 ; TUNIT-NEXT:    call void @baz.6(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) [[RETVAL]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR4]]
-; TUNIT-NEXT:    [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8
+; TUNIT-NEXT:    [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8]]
 ; TUNIT-NEXT:    ret [[S]] [[BAR_RET]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite)
@@ -244,7 +244,7 @@ define internal %S @bar.5(ptr %this) {
 ; CGSCC-NEXT:    [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8
 ; CGSCC-NEXT:    store ptr [[THIS]], ptr [[THIS]], align 8
 ; CGSCC-NEXT:    call void @baz.6(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR9:[0-9]+]]
-; CGSCC-NEXT:    [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8
+; CGSCC-NEXT:    [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8]]
 ; CGSCC-NEXT:    ret [[S]] [[BAR_RET]]
 ;
 entry:
@@ -286,7 +286,7 @@ define internal void @boom(ptr %this, ptr %data) {
 ; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[DATA_ADDR:%.*]] = alloca ptr, i32 0, align 8
 ; TUNIT-NEXT:    store ptr [[DATA]], ptr [[DATA_ADDR]], align 8
-; TUNIT-NEXT:    [[V:%.*]] = load ptr, ptr [[DATA_ADDR]], align 8
+; TUNIT-NEXT:    [[V:%.*]] = load ptr, ptr [[DATA_ADDR]], align 8, !invariant.load [[META8]]
 ; TUNIT-NEXT:    store ptr [[V]], ptr [[THIS]], align 8
 ; TUNIT-NEXT:    ret void
 ;
@@ -342,14 +342,6 @@ define %S.2 @t3.helper() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[S_2:%.*]], align 8
 ; CHECK-NEXT:    call void @ext1(ptr noundef nonnull align 8 dereferenceable(24) [[RETVAL]])
-; CHECK-NEXT:    [[DOTFCA_0_LOAD:%.*]] = load ptr, ptr [[RETVAL]], align 8
-; CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [[S_2]] poison, ptr [[DOTFCA_0_LOAD]], 0
-; CHECK-NEXT:    [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [[S_2]], ptr [[RETVAL]], i32 0, i32 1
-; CHECK-NEXT:    [[DOTFCA_1_LOAD:%.*]] = load i64, ptr [[DOTFCA_1_GEP]], align 8
-; CHECK-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [[S_2]] [[DOTFCA_0_INSERT]], i64 [[DOTFCA_1_LOAD]], 1
-; CHECK-NEXT:    [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [[S_2]], ptr [[RETVAL]], i32 0, i32 2
-; CHECK-NEXT:    [[DOTFCA_2_LOAD:%.*]] = load i64, ptr [[DOTFCA_2_GEP]], align 8
-; CHECK-NEXT:    [[DOTFCA_2_INSERT:%.*]] = insertvalue [[S_2]] [[DOTFCA_1_INSERT]], i64 [[DOTFCA_2_LOAD]], 2
 ; CHECK-NEXT:    ret [[S_2]] zeroinitializer
 ;
 entry:
@@ -508,7 +500,7 @@ define internal %S @t4a(ptr %this) {
 ; CGSCC-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, i32 0, align 8
 ; CGSCC-NEXT:    store ptr [[THIS]], ptr [[THIS]], align 8
 ; CGSCC-NEXT:    call void @t4b(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]]) #[[ATTR6]]
-; CGSCC-NEXT:    [[TMP0:%.*]] = load [[S]], ptr [[RETVAL]], align 8
+; CGSCC-NEXT:    [[TMP0:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8]]
 ; CGSCC-NEXT:    ret [[S]] [[TMP0]]
 ;
 entry:
@@ -623,6 +615,7 @@ entry:
 ; TUNIT: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
 ; TUNIT: [[META6:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 2}
 ; TUNIT: [[META7:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3}
+; TUNIT: [[META8]] = !{}
 ;.
 ; CGSCC: [[META0:![0-9]+]] = !{i32 2, !"SDK Version", [2 x i32] [i32 11, i32 5]}
 ; CGSCC: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
@@ -632,4 +625,5 @@ entry:
 ; CGSCC: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
 ; CGSCC: [[META6:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 2}
 ; CGSCC: [[META7:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3}
+; CGSCC: [[META8]] = !{}
 ;.


        


More information about the llvm-commits mailing list