[llvm] [Reland][IPO] Added attributor for identifying invariant loads (PR #146584)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 1 11:02:14 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: None (zGoldthorpe)
<details>
<summary>Changes</summary>
Patched and tested the `AAInvariantLoadPointer` attributor from #<!-- -->141800, which identifies pointers whose loads are eligible to be marked as `!invariant.load`.
The bug in the attributor was due to `AAMemoryBehavior` always identifying pointers obtained from `alloca`s as having no writes. I'm not entirely sure why `AAMemoryBehavior` behaves this way, but it seems to be beceause it identifies the scope of an `alloca` to be limited to only that instruction (and, certainly, no memory writes occur within the `alloca` instructin). This patch just adds a check to disallow all loads from `alloca` pointers from being marked `!invariant.load` (since any well-defined program will have to write to stack pointers at some point).
@<!-- -->shiltian @<!-- -->krzysz00 (since I still can't request reviews explicitly).
---
Patch is 38.61 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146584.diff
6 Files Affected:
- (modified) llvm/include/llvm/Transforms/IPO/Attributor.h (+41)
- (modified) llvm/lib/Transforms/IPO/Attributor.cpp (+2)
- (modified) llvm/lib/Transforms/IPO/AttributorAttributes.cpp (+343)
- (added) llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll (+431)
- (modified) llvm/test/Transforms/Attributor/dereferenceable-1.ll (-1)
- (modified) llvm/test/Transforms/Attributor/value-simplify-local-remote.ll (-8)
``````````diff
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index e6eb756df987d..f19f3292c4798 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6335,6 +6335,47 @@ struct AAUnderlyingObjects : AbstractAttribute {
AA::ValueScope Scope = AA::Interprocedural) const = 0;
};
+/// An abstract interface for identifying pointers from which loads can be
+/// marked invariant.
+struct AAInvariantLoadPointer : public AbstractAttribute {
+ AAInvariantLoadPointer(const IRPosition &IRP) : AbstractAttribute(IRP) {}
+
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ if (!IRP.getAssociatedType()->isPointerTy())
+ return false;
+
+ return AbstractAttribute::isValidIRPositionForInit(A, IRP);
+ }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAInvariantLoadPointer &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// Return true if the pointer's contents are known to remain invariant.
+ virtual bool isKnownInvariant() const = 0;
+ virtual bool isKnownLocallyInvariant() const = 0;
+
+ /// Return true if the pointer's contents are assumed to remain invariant.
+ virtual bool isAssumedInvariant() const = 0;
+ virtual bool isAssumedLocallyInvariant() const = 0;
+
+ /// See AbstractAttribute::getName().
+ StringRef getName() const override { return "AAInvariantLoadPointer"; }
+
+ /// See AbstractAttribute::getIdAddr().
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is
+ /// AAInvariantLoadPointer
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ /// Unique ID (due to the unique address).
+ static const char ID;
+};
+
/// An abstract interface for address space information.
struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
AAAddressSpace(const IRPosition &IRP, Attributor &A)
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index dac1f7a30c370..a2548258ddaf0 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -3612,6 +3612,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
if (SimplifyAllLoads)
getAssumedSimplified(IRPosition::value(I), nullptr,
UsedAssumedInformation, AA::Intraprocedural);
+ getOrCreateAAFor<AAInvariantLoadPointer>(
+ IRPosition::value(*LI->getPointerOperand()));
getOrCreateAAFor<AAAddressSpace>(
IRPosition::value(*LI->getPointerOperand()));
} else {
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 3799a696f67af..22dde04745bec 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -191,6 +191,7 @@ PIPE_OPERATOR(AAInterFnReachability)
PIPE_OPERATOR(AAPointerInfo)
PIPE_OPERATOR(AAAssumptionInfo)
PIPE_OPERATOR(AAUnderlyingObjects)
+PIPE_OPERATOR(AAInvariantLoadPointer)
PIPE_OPERATOR(AAAddressSpace)
PIPE_OPERATOR(AAAllocationInfo)
PIPE_OPERATOR(AAIndirectCallInfo)
@@ -12533,6 +12534,346 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo {
};
} // namespace
+/// --------------------- Invariant Load Pointer -------------------------------
+namespace {
+
+struct AAInvariantLoadPointerImpl
+ : public StateWrapper<BitIntegerState<uint8_t, 15>,
+ AAInvariantLoadPointer> {
+
+ enum {
+ // pointer does not alias within the bounds of the function
+ IS_NOALIAS = 1 << 0,
+ // pointer is not involved in any effectful instructions within the bounds
+ // of the function
+ IS_NOEFFECT = 1 << 1,
+ // loads are invariant within the bounds of the function
+ IS_LOCALLY_INVARIANT = 1 << 2,
+ // memory lifetime is constrained within the bounds of the function
+ IS_LOCALLY_CONSTRAINED = 1 << 3,
+
+ IS_BEST_STATE = IS_NOALIAS | IS_NOEFFECT | IS_LOCALLY_INVARIANT |
+ IS_LOCALLY_CONSTRAINED,
+ };
+ static_assert(getBestState() == IS_BEST_STATE, "Unexpected best state");
+
+ using Base =
+ StateWrapper<BitIntegerState<uint8_t, 15>, AAInvariantLoadPointer>;
+
+ // the BitIntegerState is optimistic about IS_NOALIAS and IS_NOEFFECT, but
+ // pessimistic about IS_KNOWN_INVARIANT
+ AAInvariantLoadPointerImpl(const IRPosition &IRP, Attributor &A)
+ : Base(IRP) {}
+
+ bool isKnownInvariant() const final {
+ return isKnownLocallyInvariant() && isKnown(IS_LOCALLY_CONSTRAINED);
+ }
+
+ bool isKnownLocallyInvariant() const final {
+ if (isKnown(IS_LOCALLY_INVARIANT))
+ return true;
+ return isKnown(IS_NOALIAS | IS_NOEFFECT);
+ }
+
+ bool isAssumedInvariant() const final {
+ return isAssumedLocallyInvariant() && isAssumed(IS_LOCALLY_CONSTRAINED);
+ }
+
+ bool isAssumedLocallyInvariant() const final {
+ if (isAssumed(IS_LOCALLY_INVARIANT))
+ return true;
+ return isAssumed(IS_NOALIAS | IS_NOEFFECT);
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+ Changed |= updateNoAlias(A);
+ if (requiresNoAlias() && !isAssumed(IS_NOALIAS))
+ return indicatePessimisticFixpoint();
+
+ Changed |= updateNoEffect(A);
+
+ Changed |= updateLocalInvariance(A);
+
+ return Changed;
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ if (!isKnownInvariant())
+ return ChangeStatus::UNCHANGED;
+
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ const Value *Ptr = &getAssociatedValue();
+ const auto TagInvariantLoads = [&](const Use &U, bool &) {
+ if (U.get() != Ptr)
+ return true;
+ auto *I = dyn_cast<Instruction>(U.getUser());
+ if (!I)
+ return true;
+
+ // Ensure that we are only changing uses from the corresponding callgraph
+ // SSC in the case that the AA isn't run on the entire module
+ if (!A.isRunOn(I->getFunction()))
+ return true;
+
+ if (I->hasMetadata(LLVMContext::MD_invariant_load))
+ return true;
+
+ if (auto *LI = dyn_cast<LoadInst>(I)) {
+ LI->setMetadata(LLVMContext::MD_invariant_load,
+ MDNode::get(LI->getContext(), {}));
+ Changed = ChangeStatus::CHANGED;
+ }
+ return true;
+ };
+
+ (void)A.checkForAllUses(TagInvariantLoads, *this, *Ptr);
+ return Changed;
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr(Attributor *) const override {
+ if (isKnownInvariant())
+ return "load-invariant pointer";
+ return "non-invariant pointer";
+ }
+
+ /// See AbstractAttribute::trackStatistics().
+ void trackStatistics() const override {}
+
+private:
+ /// Indicate that noalias is required for the pointer to be invariant.
+ bool requiresNoAlias() const {
+ switch (getPositionKind()) {
+ default:
+ // Conservatively default to require noalias.
+ return true;
+ case IRP_FLOAT:
+ case IRP_RETURNED:
+ case IRP_CALL_SITE:
+ return false;
+ case IRP_CALL_SITE_RETURNED: {
+ const auto &CB = cast<CallBase>(getAnchorValue());
+ return !isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
+ &CB, /*MustPreserveNullness=*/false);
+ }
+ case IRP_ARGUMENT: {
+ const Function *F = getAssociatedFunction();
+ assert(F && "no associated function for argument");
+ return !isCallableCC(F->getCallingConv());
+ }
+ }
+ }
+
+ bool isExternal() const {
+ const Function *F = getAssociatedFunction();
+ if (!F)
+ return true;
+ return isCallableCC(F->getCallingConv()) &&
+ getPositionKind() != IRP_CALL_SITE_RETURNED;
+ }
+
+ ChangeStatus updateNoAlias(Attributor &A) {
+ if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS))
+ return ChangeStatus::UNCHANGED;
+
+ // Try to use AANoAlias.
+ if (const auto *ANoAlias = A.getOrCreateAAFor<AANoAlias>(
+ getIRPosition(), this, DepClassTy::REQUIRED)) {
+ if (ANoAlias->isKnownNoAlias()) {
+ addKnownBits(IS_NOALIAS);
+ return ChangeStatus::CHANGED;
+ }
+
+ if (!ANoAlias->isAssumedNoAlias()) {
+ removeAssumedBits(IS_NOALIAS);
+ return ChangeStatus::CHANGED;
+ }
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ // Try to infer noalias from argument attribute, since it is applicable for
+ // the duration of the function.
+ if (const Argument *Arg = getAssociatedArgument()) {
+ if (Arg->hasNoAliasAttr()) {
+ addKnownBits(IS_NOALIAS);
+ return ChangeStatus::UNCHANGED;
+ }
+
+ // Noalias information is not provided, and cannot be inferred,
+ // so we conservatively assume the pointer aliases.
+ removeAssumedBits(IS_NOALIAS);
+ return ChangeStatus::CHANGED;
+ }
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ ChangeStatus updateNoEffect(Attributor &A) {
+ if (isKnown(IS_NOEFFECT) || !isAssumed(IS_NOEFFECT))
+ return ChangeStatus::UNCHANGED;
+
+ if (!getAssociatedFunction())
+ return indicatePessimisticFixpoint();
+
+ if (isa<AllocaInst>(&getAssociatedValue()))
+ return indicatePessimisticFixpoint();
+
+ const auto HasNoEffectLoads = [&](const Use &U, bool &) {
+ const auto *LI = dyn_cast<LoadInst>(U.getUser());
+ return !LI || !LI->mayHaveSideEffects();
+ };
+ if (!A.checkForAllUses(HasNoEffectLoads, *this, getAssociatedValue()))
+ return indicatePessimisticFixpoint();
+
+ if (const auto *AMemoryBehavior = A.getOrCreateAAFor<AAMemoryBehavior>(
+ getIRPosition(), this, DepClassTy::REQUIRED)) {
+ // For non-instructions, try to use AAMemoryBehavior to infer the readonly
+ // attribute
+ if (!AMemoryBehavior->isAssumedReadOnly())
+ return indicatePessimisticFixpoint();
+
+ if (AMemoryBehavior->isKnownReadOnly()) {
+ addKnownBits(IS_NOEFFECT);
+ return ChangeStatus::UNCHANGED;
+ }
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ if (const Argument *Arg = getAssociatedArgument()) {
+ if (Arg->onlyReadsMemory()) {
+ addKnownBits(IS_NOEFFECT);
+ return ChangeStatus::UNCHANGED;
+ }
+
+ // Readonly information is not provided, and cannot be inferred from
+ // AAMemoryBehavior.
+ return indicatePessimisticFixpoint();
+ }
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ ChangeStatus updateLocalInvariance(Attributor &A) {
+ if (isKnown(IS_LOCALLY_INVARIANT) || !isAssumed(IS_LOCALLY_INVARIANT))
+ return ChangeStatus::UNCHANGED;
+
+ // try to infer invariance from underlying objects
+ const auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(
+ getIRPosition(), this, DepClassTy::REQUIRED);
+ if (!AUO)
+ return ChangeStatus::UNCHANGED;
+
+ bool UsedAssumedInformation = false;
+ const auto IsLocallyInvariantLoadIfPointer = [&](const Value &V) {
+ if (!V.getType()->isPointerTy())
+ return true;
+ const auto *IsInvariantLoadPointer =
+ A.getOrCreateAAFor<AAInvariantLoadPointer>(IRPosition::value(V), this,
+ DepClassTy::REQUIRED);
+ // Conservatively fail if invariance cannot be inferred.
+ if (!IsInvariantLoadPointer)
+ return false;
+
+ if (IsInvariantLoadPointer->isKnownLocallyInvariant())
+ return true;
+ if (!IsInvariantLoadPointer->isAssumedLocallyInvariant())
+ return false;
+
+ UsedAssumedInformation = true;
+ return true;
+ };
+ if (!AUO->forallUnderlyingObjects(IsLocallyInvariantLoadIfPointer))
+ return indicatePessimisticFixpoint();
+
+ if (const auto *CB = dyn_cast<CallBase>(&getAnchorValue())) {
+ if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
+ CB, /*MustPreserveNullness=*/false)) {
+ for (const Value *Arg : CB->args()) {
+ if (!IsLocallyInvariantLoadIfPointer(*Arg))
+ return indicatePessimisticFixpoint();
+ }
+ }
+ }
+
+ if (!UsedAssumedInformation) {
+ // Pointer is known and not just assumed to be locally invariant.
+ addKnownBits(IS_LOCALLY_INVARIANT);
+ return ChangeStatus::CHANGED;
+ }
+
+ return ChangeStatus::UNCHANGED;
+ }
+};
+
+struct AAInvariantLoadPointerFloating final : AAInvariantLoadPointerImpl {
+ AAInvariantLoadPointerFloating(const IRPosition &IRP, Attributor &A)
+ : AAInvariantLoadPointerImpl(IRP, A) {}
+};
+
+struct AAInvariantLoadPointerReturned final : AAInvariantLoadPointerImpl {
+ AAInvariantLoadPointerReturned(const IRPosition &IRP, Attributor &A)
+ : AAInvariantLoadPointerImpl(IRP, A) {}
+
+ void initialize(Attributor &) override {
+ removeAssumedBits(IS_LOCALLY_CONSTRAINED);
+ }
+};
+
+struct AAInvariantLoadPointerCallSiteReturned final
+ : AAInvariantLoadPointerImpl {
+ AAInvariantLoadPointerCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AAInvariantLoadPointerImpl(IRP, A) {}
+
+ void initialize(Attributor &A) override {
+ const Function *F = getAssociatedFunction();
+ assert(F && "no associated function for return from call");
+
+ if (!F->isDeclaration() && !F->isIntrinsic())
+ return AAInvariantLoadPointerImpl::initialize(A);
+
+ const auto &CB = cast<CallBase>(getAnchorValue());
+ if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
+ &CB, /*MustPreserveNullness=*/false))
+ return AAInvariantLoadPointerImpl::initialize(A);
+
+ if (F->onlyReadsMemory() && F->hasNoSync())
+ return AAInvariantLoadPointerImpl::initialize(A);
+
+ // At this point, the function is opaque, so we conservatively assume
+ // non-invariance.
+ indicatePessimisticFixpoint();
+ }
+};
+
+struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl {
+ AAInvariantLoadPointerArgument(const IRPosition &IRP, Attributor &A)
+ : AAInvariantLoadPointerImpl(IRP, A) {}
+
+ void initialize(Attributor &) override {
+ const Function *F = getAssociatedFunction();
+ assert(F && "no associated function for argument");
+
+ if (!isCallableCC(F->getCallingConv())) {
+ addKnownBits(IS_LOCALLY_CONSTRAINED);
+ return;
+ }
+
+ if (!F->hasLocalLinkage())
+ removeAssumedBits(IS_LOCALLY_CONSTRAINED);
+ }
+};
+
+struct AAInvariantLoadPointerCallSiteArgument final
+ : AAInvariantLoadPointerImpl {
+ AAInvariantLoadPointerCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AAInvariantLoadPointerImpl(IRP, A) {}
+};
+} // namespace
+
/// ------------------------ Address Space ------------------------------------
namespace {
@@ -13038,6 +13379,7 @@ const char AAInterFnReachability::ID = 0;
const char AAPointerInfo::ID = 0;
const char AAAssumptionInfo::ID = 0;
const char AAUnderlyingObjects::ID = 0;
+const char AAInvariantLoadPointer::ID = 0;
const char AAAddressSpace::ID = 0;
const char AAAllocationInfo::ID = 0;
const char AAIndirectCallInfo::ID = 0;
@@ -13172,6 +13514,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFPClass)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInvariantLoadPointer)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAddressSpace)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAllocationInfo)
diff --git a/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
new file mode 100644
index 0000000000000..d5aa6b10b5add
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
@@ -0,0 +1,431 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=attributor %s -S | FileCheck %s --check-prefix=AMDGCN
+
+ at G = addrspace(1) global i32 zeroinitializer, align 4
+declare void @clobber(i32) #0
+declare void @clobber.p5(ptr addrspace(5)) #0
+declare ptr addrspace(1) @get_ptr() #0
+declare noalias ptr addrspace(1) @get_noalias_ptr() #0
+declare noalias ptr addrspace(1) @get_untouched_ptr() #1
+
+define void @test_nonkernel(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define void @test_nonkernel(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7:[0-9]+]]
+; AMDGCN-NEXT: ret void
+;
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ ;; may not be !invariant.load, as the caller may modify %ptr
+ call void @clobber(i32 %val)
+ ret void
+}
+
+define amdgpu_kernel void @test_plain(ptr addrspace(1) %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_plain(
+; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
+; AMDGCN-NEXT: ret void
+;
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ ;; may not be !invariant.load, as %ptr may alias a pointer in @clobber
+ call void @clobber(i32 %val)
+ ret void
+}
+
+define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_ptr(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0:![0-9]+]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
+; AMDGCN-NEXT: ret void
+;
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ call void @clobber(i32 %val)
+ ret void
+}
+
+define amdgpu_kernel void @test_gep(ptr addrspace(1) %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_gep(
+; AMDGCN-SAME: ptr addrspace(1) nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i32 4
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
+; AMDGCN-NEXT: ret void
+;
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
+ %val = load i32, ptr addrspace(1) %gep, align 4
+ call void @clobber(i32 %val)
+ ret void
+}
+
+define amdgpu_kernel void @test_noalias_gep(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_gep(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i32 4
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4, !invariant.load [[META0]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
+; AMDGCN-NEXT: ret void
+;
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
+ %val = load i32, ptr addrspace(1) %gep, align 4
+ call void @clobber(i32 %val)
+ ret void
+}
+
+define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %swap) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_swap(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT: store i32 [[SWAP]], ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR7]]
+; AMDGCN-NEXT: ret void
+;
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ ;; cannot be !invariant.load due to the write to %ptr
+ store i32 %swap, ptr addrspace(1) %ptr, align 4
+ call void @clobber(i32 %val)
+ ret void
+}
+
+define amdgpu_kernel void @test_volatile(ptr add...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/146584
More information about the llvm-commits
mailing list