[llvm] [IPO] Added attributor for identifying invariant loads (PR #141800)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 3 14:58:21 PDT 2025
https://github.com/zGoldthorpe updated https://github.com/llvm/llvm-project/pull/141800
>From fc269c14e24b6a9731ce354fb1f1e682cb78d53e Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Wed, 28 May 2025 11:05:47 -0500
Subject: [PATCH 1/7] Added attributor for identifying `!invariant.load`s.
---
llvm/include/llvm/Transforms/IPO/Attributor.h | 38 +++
llvm/lib/Transforms/IPO/Attributor.cpp | 2 +
.../Transforms/IPO/AttributorAttributes.cpp | 245 ++++++++++++++++++
.../multiple-offsets-pointer-info.ll | 8 +-
.../Attributor/tag-invariant-loads.ll | 220 ++++++++++++++++
5 files changed, 509 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/Transforms/Attributor/tag-invariant-loads.ll
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index c628bbb007230..53fa7a04dc5b5 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6289,6 +6289,44 @@ struct AAUnderlyingObjects : AbstractAttribute {
AA::ValueScope Scope = AA::Interprocedural) const = 0;
};
+/// An abstract interface for identifying pointers from which loads can be
+/// marked invariant.
+struct AAInvariantLoadPointer : public AbstractAttribute {
+ AAInvariantLoadPointer(const IRPosition &IRP) : AbstractAttribute(IRP) {}
+
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ if (!IRP.getAssociatedType()->isPointerTy())
+ return false;
+ return AbstractAttribute::isValidIRPositionForInit(A, IRP);
+ }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAInvariantLoadPointer &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// Return true if the pointer's contents are known to remain invariant.
+ virtual bool isKnownInvariant() const = 0;
+
+ /// Return true if the pointer's contents are assumed to remain invariant.
+ virtual bool isAssumedInvariant() const = 0;
+
+ /// See AbstractAttribute::getName().
+ StringRef getName() const override { return "AAInvariantLoadPointer"; }
+
+ /// See AbstractAttribute::getIdAddr().
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is
+ /// AAInvariantLoadPointer
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ /// Unique ID (due to the unique address).
+ static const char ID;
+};
+
/// An abstract interface for address space information.
struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
AAAddressSpace(const IRPosition &IRP, Attributor &A)
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index cbdbf9ae1494d..1dc576656d12a 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -3620,6 +3620,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
if (SimplifyAllLoads)
getAssumedSimplified(IRPosition::value(I), nullptr,
UsedAssumedInformation, AA::Intraprocedural);
+ getOrCreateAAFor<AAInvariantLoadPointer>(
+ IRPosition::value(*LI->getPointerOperand()));
getOrCreateAAFor<AAAddressSpace>(
IRPosition::value(*LI->getPointerOperand()));
} else {
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 470c5308edca4..f0647747d6c7f 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -191,6 +191,7 @@ PIPE_OPERATOR(AAInterFnReachability)
PIPE_OPERATOR(AAPointerInfo)
PIPE_OPERATOR(AAAssumptionInfo)
PIPE_OPERATOR(AAUnderlyingObjects)
+PIPE_OPERATOR(AAInvariantLoadPointer)
PIPE_OPERATOR(AAAddressSpace)
PIPE_OPERATOR(AAAllocationInfo)
PIPE_OPERATOR(AAIndirectCallInfo)
@@ -12534,6 +12535,248 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo {
};
} // namespace
+/// --------------------- Invariant Load Pointer -------------------------------
+namespace {
+
+struct AAInvariantLoadPointerImpl
+ : public StateWrapper<BitIntegerState<uint8_t, 7>, AAInvariantLoadPointer,
+ uint8_t> {
+ // load invariance is implied by, but not equivalent to IS_NOALIAS |
+ // IS_READONLY, as load invariance is also implied by all underlying objects
+ // being load invariant.
+ //
+ // IS_INVARIANT is set to indicate that the contents of the pointer are
+ // *known* to be invariant.
+ enum {
+ IS_INVARIANT = 1 << 0,
+ IS_NOALIAS = 1 << 1,
+ IS_READONLY = 1 << 2,
+ };
+ static_assert(getBestState() == (IS_INVARIANT | IS_NOALIAS | IS_READONLY),
+ "Unexpected best state!");
+
+ using Base = StateWrapper<BitIntegerState<uint8_t, 7>, AAInvariantLoadPointer,
+ uint8_t>;
+
+ // the BitIntegerState is optimistic about noalias and readonly, but
+ // pessimistic about invariance
+ AAInvariantLoadPointerImpl(const IRPosition &IRP, Attributor &A)
+ : Base(IRP, IS_NOALIAS | IS_READONLY) {}
+
+ void initialize(Attributor &A) final {
+ // conservatively assume that the pointer's contents are not invariant,
+ // until proven otherwise.
+ removeAssumedBits(IS_INVARIANT);
+ }
+
+ bool isKnownInvariant() const final {
+ return isKnown(IS_INVARIANT) || isKnown(IS_NOALIAS | IS_READONLY);
+ }
+
+ bool isAssumedInvariant() const final {
+ return isAssumed(IS_INVARIANT) || isAssumed(IS_NOALIAS | IS_READONLY);
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ if (isKnownInvariant())
+ return ChangeStatus::UNCHANGED;
+
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+ Changed |= updateNoAlias(A);
+ Changed |= updateReadOnly(A);
+
+ bool UsedAssumedInformation = false;
+ const auto IsInvariantLoadIfPointer = [&](const Value &V) {
+ if (!V.getType()->isPointerTy())
+ return true;
+ const auto *IsInvariantLoadPointer =
+ A.getOrCreateAAFor<AAInvariantLoadPointer>(IRPosition::value(V), this,
+ DepClassTy::REQUIRED);
+ if (IsInvariantLoadPointer->isKnownInvariant())
+ return true;
+ if (!IsInvariantLoadPointer->isAssumedInvariant())
+ return false;
+
+ UsedAssumedInformation = true;
+ return true;
+ };
+
+ const auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(
+ getIRPosition(), this, DepClassTy::REQUIRED);
+
+ if (!AUO->forallUnderlyingObjects(IsInvariantLoadIfPointer)) {
+ removeAssumedBits(IS_INVARIANT);
+ return ChangeStatus::CHANGED;
+ }
+
+ if (!UsedAssumedInformation) {
+ // pointer is known (not assumed) to be invariant
+ addKnownBits(IS_INVARIANT);
+ return ChangeStatus::CHANGED;
+ }
+
+ return Changed;
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ if (!isKnownInvariant())
+ return ChangeStatus::UNCHANGED;
+
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ Value *Ptr = &getAssociatedValue();
+ const auto TagInvariantLoads = [&](const Use &U, bool &) {
+ if (U.get() != Ptr)
+ return true;
+ auto *I = dyn_cast<Instruction>(U.getUser());
+ if (!I)
+ return true;
+
+ // Ensure that we are only changing uses from the corresponding callgraph
+ // SSC in the case that the AA isn't run on the entire module
+ if (!A.isRunOn(I->getFunction()))
+ return true;
+
+ if (I->hasMetadata(LLVMContext::MD_invariant_load))
+ return true;
+
+ if (auto *LI = dyn_cast<LoadInst>(I)) {
+ if (LI->isVolatile() || LI->isAtomic())
+ return true;
+
+ LI->setMetadata(LLVMContext::MD_invariant_load,
+ MDNode::get(LI->getContext(), {}));
+ Changed = ChangeStatus::CHANGED;
+ }
+ return true;
+ };
+
+ (void)A.checkForAllUses(TagInvariantLoads, *this, *Ptr);
+ return Changed;
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr(Attributor *) const override {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << "load invariant pointer: " << isKnown() << '\n';
+ return Str;
+ }
+
+ /// See AbstractAttribute::trackStatistics().
+ void trackStatistics() const override {}
+
+protected:
+ ChangeStatus updateNoAlias(Attributor &A) {
+ if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS))
+ return ChangeStatus::UNCHANGED;
+
+ const auto *ANoAlias = A.getOrCreateAAFor<AANoAlias>(getIRPosition(), this,
+ DepClassTy::REQUIRED);
+ if (!ANoAlias)
+ return tryInferNoAlias(A);
+
+ if (!ANoAlias->isAssumedNoAlias()) {
+ removeAssumedBits(IS_NOALIAS);
+ return ChangeStatus::CHANGED;
+ }
+ if (ANoAlias->isKnownNoAlias())
+ addKnownBits(IS_NOALIAS);
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// Fallback method if updateNoAlias fails to infer noalias information from
+ /// AANoAlias.
+ virtual ChangeStatus tryInferNoAlias(Attributor &A) {
+ return ChangeStatus::UNCHANGED;
+ }
+
+ ChangeStatus updateReadOnly(Attributor &A) {
+ if (isKnown(IS_READONLY) || !isAssumed(IS_READONLY))
+ return ChangeStatus::UNCHANGED;
+
+ // AAMemoryBehavior may crash if value is global
+ if (!getAssociatedFunction())
+ return tryInferReadOnly(A);
+
+ const auto *AMemoryBehavior = A.getOrCreateAAFor<AAMemoryBehavior>(
+ getIRPosition(), this, DepClassTy::REQUIRED);
+ if (!AMemoryBehavior)
+ return tryInferReadOnly(A);
+
+ if (!AMemoryBehavior->isAssumedReadOnly()) {
+ removeAssumedBits(IS_READONLY);
+ return ChangeStatus::CHANGED;
+ }
+ if (AMemoryBehavior->isKnownReadOnly())
+ addKnownBits(IS_READONLY);
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// Fallback method if updateReadOnly fails to infer readonly information from
+ /// AAMemoryBehavior.
+ virtual ChangeStatus tryInferReadOnly(Attributor &A) {
+ return ChangeStatus::UNCHANGED;
+ }
+};
+
+struct AAInvariantLoadPointerFloating final : AAInvariantLoadPointerImpl {
+ AAInvariantLoadPointerFloating(const IRPosition &IRP, Attributor &A)
+ : AAInvariantLoadPointerImpl(IRP, A) {}
+};
+
+struct AAInvariantLoadPointerReturned final : AAInvariantLoadPointerImpl {
+ AAInvariantLoadPointerReturned(const IRPosition &IRP, Attributor &A)
+ : AAInvariantLoadPointerImpl(IRP, A) {}
+};
+
+struct AAInvariantLoadPointerCallSiteReturned final
+ : AAInvariantLoadPointerImpl {
+ AAInvariantLoadPointerCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AAInvariantLoadPointerImpl(IRP, A) {}
+};
+
+struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl {
+ AAInvariantLoadPointerArgument(const IRPosition &IRP, Attributor &A)
+ : AAInvariantLoadPointerImpl(IRP, A) {}
+
+protected:
+ ChangeStatus tryInferNoAlias(Attributor &A) override {
+ const auto *Arg = getAssociatedArgument();
+ if (Arg->hasNoAliasAttr()) {
+ addKnownBits(IS_NOALIAS);
+ return ChangeStatus::UNCHANGED;
+ }
+
+ // noalias information is not provided, and cannot be inferred from
+ // AANoAlias
+ removeAssumedBits(IS_NOALIAS);
+ return ChangeStatus::CHANGED;
+ }
+
+ ChangeStatus tryInferReadOnly(Attributor &A) override {
+ const auto *Arg = getAssociatedArgument();
+ if (Arg->onlyReadsMemory()) {
+ addKnownBits(IS_READONLY);
+ return ChangeStatus::UNCHANGED;
+ }
+
+ // readonly information is not provided, and cannot be inferred from
+ // AAMemoryBehavior
+ removeAssumedBits(IS_READONLY);
+ return ChangeStatus::CHANGED;
+ }
+};
+
+struct AAInvariantLoadPointerCallSiteArgument final
+ : AAInvariantLoadPointerImpl {
+ AAInvariantLoadPointerCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AAInvariantLoadPointerImpl(IRP, A) {}
+};
+} // namespace
+
/// ------------------------ Address Space ------------------------------------
namespace {
@@ -13031,6 +13274,7 @@ const char AAInterFnReachability::ID = 0;
const char AAPointerInfo::ID = 0;
const char AAAssumptionInfo::ID = 0;
const char AAUnderlyingObjects::ID = 0;
+const char AAInvariantLoadPointer::ID = 0;
const char AAAddressSpace::ID = 0;
const char AAAllocationInfo::ID = 0;
const char AAIndirectCallInfo::ID = 0;
@@ -13165,6 +13409,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFPClass)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInvariantLoadPointer)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAddressSpace)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAllocationInfo)
diff --git a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
index f04ac4d73340f..9e58a35107491 100644
--- a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
@@ -10,7 +10,7 @@ define i8 @select_offsets_simplifiable_1(i1 %cnd1, i1 %cnd2) {
; CHECK-LABEL: define {{[^@]+}}@select_offsets_simplifiable_1
; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1)
+; CHECK-NEXT: [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1)
; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 23
; CHECK-NEXT: store i8 23, ptr [[GEP23]], align 4
; CHECK-NEXT: [[GEP29:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 29
@@ -190,7 +190,7 @@ define i8 @select_offsets_not_simplifiable_3(i1 %cnd1, i1 %cnd2) {
; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_3
; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1)
+; CHECK-NEXT: [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1)
; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7
; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[SEL1]]
@@ -214,7 +214,7 @@ define i8 @select_offsets_not_simplifiable_4(i1 %cnd1, i1 %cnd2) {
; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_4
; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1)
+; CHECK-NEXT: [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1)
; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7
; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[SEL1]]
@@ -445,7 +445,7 @@ define i8 @phi_gep_not_simplifiable_2(i1 %cnd1, i1 %cnd2) {
; CHECK-LABEL: define {{[^@]+}}@phi_gep_not_simplifiable_2
; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1)
+; CHECK-NEXT: [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1)
; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 23
; CHECK-NEXT: br i1 [[CND1]], label [[THEN:%.*]], label [[ELSE:%.*]]
; CHECK: then:
diff --git a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
new file mode 100644
index 0000000000000..6df07a0d68bee
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
@@ -0,0 +1,220 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=attributor %s -S | FileCheck %s
+
+ at G = global i32 zeroinitializer, align 4
+
+declare ptr @get_ptr()
+declare noalias ptr @get_noalias_ptr()
+
+define i32 @test_plain(ptr %ptr) {
+; CHECK-LABEL: define i32 @test_plain(
+; CHECK-SAME: ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+ %val = load i32, ptr %ptr, align 4
+ ret i32 %val
+}
+
+define i32 @test_noalias_ptr(ptr noalias %ptr) {
+; CHECK-LABEL: define i32 @test_noalias_ptr(
+; CHECK-SAME: ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0:![0-9]+]]
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+ %val = load i32, ptr %ptr, align 4
+ ret i32 %val
+}
+
+define i32 @test_swap(ptr noalias %ptr, i32 %write) {
+; CHECK-LABEL: define i32 @test_swap(
+; CHECK-SAME: ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[PTR:%.*]], i32 [[WRITE:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT: store i32 [[WRITE]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+ %val = load i32, ptr %ptr, align 4
+ store i32 %write, ptr %ptr, align 4
+ ret i32 %val
+}
+
+define i32 @test_volatile_load(ptr noalias %ptr) {
+; CHECK-LABEL: define i32 @test_volatile_load(
+; CHECK-SAME: ptr noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: [[VAL:%.*]] = load volatile i32, ptr [[PTR]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+ %val = load volatile i32, ptr %ptr, align 4
+ ret i32 %val
+}
+
+define i32 @test_atomic_load(ptr noalias %ptr) {
+; CHECK-LABEL: define i32 @test_atomic_load(
+; CHECK-SAME: ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[PTR]] unordered, align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+ %val = load atomic i32, ptr %ptr unordered, align 4
+ ret i32 %val
+}
+
+define i32 @test_atomic_volatile_load(ptr noalias %ptr) {
+; CHECK-LABEL: define i32 @test_atomic_volatile_load(
+; CHECK-SAME: ptr noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[VAL:%.*]] = load atomic volatile i32, ptr [[PTR]] unordered, align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+ %val = load atomic volatile i32, ptr %ptr unordered, align 4
+ ret i32 %val
+}
+
+define i32 @test_global() {
+; CHECK-LABEL: define i32 @test_global(
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr @G, align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+ %val = load i32, ptr @G, align 4
+ ret i32 %val
+}
+
+define internal i32 @test_internal_noalias_load(ptr %ptr) {
+; CHECK-LABEL: define internal i32 @test_internal_noalias_load(
+; CHECK-SAME: ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+ %val = load i32, ptr %ptr, align 4
+ ret i32 %val
+}
+
+define i32 @test_call_internal_noalias(ptr noalias %ptr) {
+; CHECK-LABEL: define i32 @test_call_internal_noalias(
+; CHECK-SAME: ptr noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR4:[0-9]+]]
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+ %val = call i32 @test_internal_noalias_load(ptr %ptr)
+ ret i32 %val
+}
+
+define internal i32 @test_internal_load(ptr %ptr) {
+; CHECK-LABEL: define internal i32 @test_internal_load(
+; CHECK-SAME: ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+ %val = load i32, ptr %ptr, align 4
+ ret i32 %val
+}
+
+define i32 @test_call_internal(ptr %ptr) {
+; CHECK-LABEL: define i32 @test_call_internal(
+; CHECK-SAME: ptr nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR4]]
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+ %val = call i32 @test_internal_load(ptr %ptr)
+ ret i32 %val
+}
+
+define i32 @test_call_ptr() {
+; CHECK-LABEL: define i32 @test_call_ptr() {
+; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_ptr()
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+ %ptr = call ptr @get_ptr()
+ %val = load i32, ptr %ptr, align 4
+ ret i32 %val
+}
+
+define i32 @test_call_noalias_ptr() {
+; CHECK-LABEL: define i32 @test_call_noalias_ptr() {
+; CHECK-NEXT: [[PTR:%.*]] = call noalias ptr @get_noalias_ptr()
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+ %ptr = call ptr @get_noalias_ptr()
+ %val = load i32, ptr %ptr, align 4
+ ret i32 %val
+}
+
+define i32 @test_selected_load(i1 %cond, ptr noalias %ptr.true, ptr noalias %ptr.false) {
+; CHECK-LABEL: define i32 @test_selected_load(
+; CHECK-SAME: i1 [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr [[PTR_TRUE]], ptr [[PTR_FALSE]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+ %ptr = select i1 %cond, ptr %ptr.true, ptr %ptr.false
+ %val = load i32, ptr %ptr, align 4
+ ret i32 %val
+}
+
+define i32 @test_selected_load_partial_noalias(i1 %cond, ptr noalias %ptr.true, ptr %ptr.false) {
+; CHECK-LABEL: define i32 @test_selected_load_partial_noalias(
+; CHECK-SAME: i1 [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr [[PTR_TRUE]], ptr [[PTR_FALSE]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+ %ptr = select i1 %cond, ptr %ptr.true, ptr %ptr.false
+ %val = load i32, ptr %ptr, align 4
+ ret i32 %val
+}
+
+define i32 @test_branch_load(i1 %cond, ptr noalias %ptr.true, ptr noalias %ptr.false) {
+; CHECK-LABEL: define i32 @test_branch_load(
+; CHECK-SAME: i1 noundef [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
+; CHECK: [[TRUE]]:
+; CHECK-NEXT: br label %[[FINISH:.*]]
+; CHECK: [[FALSE]]:
+; CHECK-NEXT: br label %[[FINISH]]
+; CHECK: [[FINISH]]:
+; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ br i1 %cond, label %true, label %false
+true:
+ br label %finish
+false:
+ br label %finish
+finish:
+ %ptr = phi ptr [ %ptr.true, %true ], [ %ptr.false, %false ]
+ %val = load i32, ptr %ptr, align 4
+ ret i32 %val
+}
+
+define i32 @test_branch_load_partial_noalias(i1 %cond, ptr noalias %ptr.true, ptr %ptr.false) {
+; CHECK-LABEL: define i32 @test_branch_load_partial_noalias(
+; CHECK-SAME: i1 noundef [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
+; CHECK: [[TRUE]]:
+; CHECK-NEXT: br label %[[FINISH:.*]]
+; CHECK: [[FALSE]]:
+; CHECK-NEXT: br label %[[FINISH]]
+; CHECK: [[FINISH]]:
+; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ br i1 %cond, label %true, label %false
+true:
+ br label %finish
+false:
+ br label %finish
+finish:
+ %ptr = phi ptr [ %ptr.true, %true ], [ %ptr.false, %false ]
+ %val = load i32, ptr %ptr, align 4
+ ret i32 %val
+}
+;.
+; CHECK: [[META0]] = !{}
+;.
>From e095a93c82fad7530d152b1888131feb1d1133f4 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Wed, 28 May 2025 12:42:15 -0500
Subject: [PATCH 2/7] Incorporated feedback
---
llvm/include/llvm/Transforms/IPO/Attributor.h | 1 +
.../Transforms/IPO/AttributorAttributes.cpp | 128 ++++---
.../multiple-offsets-pointer-info.ll | 8 +-
.../Attributor/tag-invariant-loads.ll | 357 ++++++++++--------
4 files changed, 268 insertions(+), 226 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 53fa7a04dc5b5..38996bb051328 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6298,6 +6298,7 @@ struct AAInvariantLoadPointer : public AbstractAttribute {
static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
if (!IRP.getAssociatedType()->isPointerTy())
return false;
+
return AbstractAttribute::isValidIRPositionForInit(A, IRP);
}
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index f0647747d6c7f..dec36b3e7dcb3 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12579,7 +12579,7 @@ struct AAInvariantLoadPointerImpl
ChangeStatus updateImpl(Attributor &A) override {
if (isKnownInvariant())
- return ChangeStatus::UNCHANGED;
+ return indicateOptimisticFixpoint();
ChangeStatus Changed = ChangeStatus::UNCHANGED;
@@ -12605,15 +12605,13 @@ struct AAInvariantLoadPointerImpl
const auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(
getIRPosition(), this, DepClassTy::REQUIRED);
- if (!AUO->forallUnderlyingObjects(IsInvariantLoadIfPointer)) {
- removeAssumedBits(IS_INVARIANT);
- return ChangeStatus::CHANGED;
- }
+ if (!AUO->forallUnderlyingObjects(IsInvariantLoadIfPointer))
+ return indicatePessimisticFixpoint();
if (!UsedAssumedInformation) {
// pointer is known (not assumed) to be invariant
addKnownBits(IS_INVARIANT);
- return ChangeStatus::CHANGED;
+ return indicateOptimisticFixpoint() | Changed;
}
return Changed;
@@ -12671,24 +12669,44 @@ struct AAInvariantLoadPointerImpl
if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS))
return ChangeStatus::UNCHANGED;
- const auto *ANoAlias = A.getOrCreateAAFor<AANoAlias>(getIRPosition(), this,
- DepClassTy::REQUIRED);
- if (!ANoAlias)
- return tryInferNoAlias(A);
+ const auto *F = getAssociatedFunction();
- if (!ANoAlias->isAssumedNoAlias()) {
+ if (F && isCallableCC(F->getCallingConv())) {
+ // program-wide alias information cannot be inferred
removeAssumedBits(IS_NOALIAS);
return ChangeStatus::CHANGED;
}
- if (ANoAlias->isKnownNoAlias())
- addKnownBits(IS_NOALIAS);
- return ChangeStatus::UNCHANGED;
- }
+ // try to use AANoAlias
+ if (const auto *ANoAlias = A.getOrCreateAAFor<AANoAlias>(
+ getIRPosition(), this, DepClassTy::REQUIRED)) {
+ if (ANoAlias->isKnownNoAlias()) {
+ addKnownBits(IS_NOALIAS);
+ return ChangeStatus::UNCHANGED;
+ }
+
+ if (!ANoAlias->isAssumedNoAlias()) {
+ removeAssumedBits(IS_NOALIAS);
+ return ChangeStatus::CHANGED;
+ }
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ // if the function is not callable, try to infer noalias from argument
+ // attribute, since it is applicable for the duration of the function
+ if (const auto *Arg = getAssociatedArgument()) {
+ if (Arg->hasNoAliasAttr()) {
+ addKnownBits(IS_NOALIAS);
+ return ChangeStatus::UNCHANGED;
+ }
+
+ // noalias information is not provided, and cannot be inferred,
+ // so we conservatively assume the pointer aliases.
+ removeAssumedBits(IS_NOALIAS);
+ return ChangeStatus::CHANGED;
+ }
- /// Fallback method if updateNoAlias fails to infer noalias information from
- /// AANoAlias.
- virtual ChangeStatus tryInferNoAlias(Attributor &A) {
return ChangeStatus::UNCHANGED;
}
@@ -12696,28 +12714,45 @@ struct AAInvariantLoadPointerImpl
if (isKnown(IS_READONLY) || !isAssumed(IS_READONLY))
return ChangeStatus::UNCHANGED;
- // AAMemoryBehavior may crash if value is global
- if (!getAssociatedFunction())
- return tryInferReadOnly(A);
+ const auto *F = getAssociatedFunction();
- const auto *AMemoryBehavior = A.getOrCreateAAFor<AAMemoryBehavior>(
- getIRPosition(), this, DepClassTy::REQUIRED);
- if (!AMemoryBehavior)
- return tryInferReadOnly(A);
+ if (!F)
+ return ChangeStatus::UNCHANGED;
- if (!AMemoryBehavior->isAssumedReadOnly()) {
+ if (isCallableCC(F->getCallingConv())) {
+ // readonly attribute is only useful if applicable program-wide
removeAssumedBits(IS_READONLY);
return ChangeStatus::CHANGED;
}
- if (AMemoryBehavior->isKnownReadOnly())
- addKnownBits(IS_READONLY);
- return ChangeStatus::UNCHANGED;
- }
+ // try to use AAMemoryBehavior to infer readonly attribute
+ if (const auto *AMemoryBehavior = A.getOrCreateAAFor<AAMemoryBehavior>(
+ getIRPosition(), this, DepClassTy::REQUIRED)) {
+ if (!AMemoryBehavior->isAssumedReadOnly()) {
+ removeAssumedBits(IS_READONLY);
+ return ChangeStatus::CHANGED;
+ }
+
+ if (AMemoryBehavior->isKnownReadOnly()) {
+ addKnownBits(IS_READONLY);
+ return ChangeStatus::UNCHANGED;
+ }
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ if (const auto *Arg = getAssociatedArgument()) {
+ if (Arg->onlyReadsMemory()) {
+ addKnownBits(IS_READONLY);
+ return ChangeStatus::UNCHANGED;
+ }
+
+ // readonly information is not provided, and cannot be inferred from
+ // AAMemoryBehavior
+ removeAssumedBits(IS_READONLY);
+ return ChangeStatus::CHANGED;
+ }
- /// Fallback method if updateReadOnly fails to infer readonly information from
- /// AAMemoryBehavior.
- virtual ChangeStatus tryInferReadOnly(Attributor &A) {
return ChangeStatus::UNCHANGED;
}
};
@@ -12741,33 +12776,6 @@ struct AAInvariantLoadPointerCallSiteReturned final
struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl {
AAInvariantLoadPointerArgument(const IRPosition &IRP, Attributor &A)
: AAInvariantLoadPointerImpl(IRP, A) {}
-
-protected:
- ChangeStatus tryInferNoAlias(Attributor &A) override {
- const auto *Arg = getAssociatedArgument();
- if (Arg->hasNoAliasAttr()) {
- addKnownBits(IS_NOALIAS);
- return ChangeStatus::UNCHANGED;
- }
-
- // noalias information is not provided, and cannot be inferred from
- // AANoAlias
- removeAssumedBits(IS_NOALIAS);
- return ChangeStatus::CHANGED;
- }
-
- ChangeStatus tryInferReadOnly(Attributor &A) override {
- const auto *Arg = getAssociatedArgument();
- if (Arg->onlyReadsMemory()) {
- addKnownBits(IS_READONLY);
- return ChangeStatus::UNCHANGED;
- }
-
- // readonly information is not provided, and cannot be inferred from
- // AAMemoryBehavior
- removeAssumedBits(IS_READONLY);
- return ChangeStatus::CHANGED;
- }
};
struct AAInvariantLoadPointerCallSiteArgument final
diff --git a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
index 9e58a35107491..f04ac4d73340f 100644
--- a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
@@ -10,7 +10,7 @@ define i8 @select_offsets_simplifiable_1(i1 %cnd1, i1 %cnd2) {
; CHECK-LABEL: define {{[^@]+}}@select_offsets_simplifiable_1
; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1)
+; CHECK-NEXT: [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1)
; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 23
; CHECK-NEXT: store i8 23, ptr [[GEP23]], align 4
; CHECK-NEXT: [[GEP29:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 29
@@ -190,7 +190,7 @@ define i8 @select_offsets_not_simplifiable_3(i1 %cnd1, i1 %cnd2) {
; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_3
; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1)
+; CHECK-NEXT: [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1)
; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7
; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[SEL1]]
@@ -214,7 +214,7 @@ define i8 @select_offsets_not_simplifiable_4(i1 %cnd1, i1 %cnd2) {
; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_4
; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1)
+; CHECK-NEXT: [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1)
; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7
; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[SEL1]]
@@ -445,7 +445,7 @@ define i8 @phi_gep_not_simplifiable_2(i1 %cnd1, i1 %cnd2) {
; CHECK-LABEL: define {{[^@]+}}@phi_gep_not_simplifiable_2
; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[BYTES:%.*]] = call noalias ptr @calloc(i64 noundef 1024, i64 noundef 1)
+; CHECK-NEXT: [[BYTES:%.*]] = call ptr @calloc(i64 noundef 1024, i64 noundef 1)
; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 23
; CHECK-NEXT: br i1 [[CND1]], label [[THEN:%.*]], label [[ELSE:%.*]]
; CHECK: then:
diff --git a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
index 6df07a0d68bee..02c304822bcb8 100644
--- a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
+++ b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
@@ -1,220 +1,253 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -passes=attributor %s -S | FileCheck %s
+; RUN: opt -mtriple=amdgcn-amd-hsa -passes=attributor %s -S | FileCheck %s --check-prefix=AMDGCN
- at G = global i32 zeroinitializer, align 4
+ at G = addrspace(1) global i32 zeroinitializer, align 4
+declare void @clobber(i32)
+declare ptr addrspace(1) @get_ptr()
+declare noalias ptr addrspace(1) @get_noalias_ptr()
-declare ptr @get_ptr()
-declare noalias ptr @get_noalias_ptr()
-
-define i32 @test_plain(ptr %ptr) {
-; CHECK-LABEL: define i32 @test_plain(
-; CHECK-SAME: ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
-; CHECK-NEXT: ret i32 [[VAL]]
+define void @test_nonkernel(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define void @test_nonkernel(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) {
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: ret void
;
- %val = load i32, ptr %ptr, align 4
- ret i32 %val
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ call void @clobber(i32 %val)
+ ret void
}
-define i32 @test_noalias_ptr(ptr noalias %ptr) {
-; CHECK-LABEL: define i32 @test_noalias_ptr(
-; CHECK-SAME: ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0:![0-9]+]]
-; CHECK-NEXT: ret i32 [[VAL]]
+define amdgpu_kernel void @test_plain(ptr addrspace(1) %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_plain(
+; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) {
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: ret void
;
- %val = load i32, ptr %ptr, align 4
- ret i32 %val
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ call void @clobber(i32 %val)
+ ret void
}
-define i32 @test_swap(ptr noalias %ptr, i32 %write) {
-; CHECK-LABEL: define i32 @test_swap(
-; CHECK-SAME: ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[PTR:%.*]], i32 [[WRITE:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
-; CHECK-NEXT: store i32 [[WRITE]], ptr [[PTR]], align 4
-; CHECK-NEXT: ret i32 [[VAL]]
+define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_ptr(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) {
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0:![0-9]+]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: ret void
;
- %val = load i32, ptr %ptr, align 4
- store i32 %write, ptr %ptr, align 4
- ret i32 %val
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ call void @clobber(i32 %val)
+ ret void
}
-define i32 @test_volatile_load(ptr noalias %ptr) {
-; CHECK-LABEL: define i32 @test_volatile_load(
-; CHECK-SAME: ptr noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
-; CHECK-NEXT: [[VAL:%.*]] = load volatile i32, ptr [[PTR]], align 4
-; CHECK-NEXT: ret i32 [[VAL]]
+define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %swap) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_swap(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) {
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT: store i32 [[SWAP]], ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: ret void
;
- %val = load volatile i32, ptr %ptr, align 4
- ret i32 %val
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ store i32 %swap, ptr addrspace(1) %ptr, align 4
+ call void @clobber(i32 %val)
+ ret void
}
-define i32 @test_atomic_load(ptr noalias %ptr) {
-; CHECK-LABEL: define i32 @test_atomic_load(
-; CHECK-SAME: ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[PTR]] unordered, align 4
-; CHECK-NEXT: ret i32 [[VAL]]
+define amdgpu_kernel void @test_volatile(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_volatile(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) {
+; AMDGCN-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: ret void
;
- %val = load atomic i32, ptr %ptr unordered, align 4
- ret i32 %val
+ %val = load volatile i32, ptr addrspace(1) %ptr, align 4
+ call void @clobber(i32 %val)
+ ret void
}
-define i32 @test_atomic_volatile_load(ptr noalias %ptr) {
-; CHECK-LABEL: define i32 @test_atomic_volatile_load(
-; CHECK-SAME: ptr noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT: [[VAL:%.*]] = load atomic volatile i32, ptr [[PTR]] unordered, align 4
-; CHECK-NEXT: ret i32 [[VAL]]
+define amdgpu_kernel void @test_atomic(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_atomic(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) {
+; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: ret void
;
- %val = load atomic volatile i32, ptr %ptr unordered, align 4
- ret i32 %val
+ %val = load atomic i32, ptr addrspace(1) %ptr unordered, align 4
+ call void @clobber(i32 %val)
+ ret void
}
-define i32 @test_global() {
-; CHECK-LABEL: define i32 @test_global(
-; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr @G, align 4
-; CHECK-NEXT: ret i32 [[VAL]]
+define amdgpu_kernel void @test_global() {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_global() {
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) @G, align 4
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: ret void
;
- %val = load i32, ptr @G, align 4
- ret i32 %val
+ %val = load i32, ptr addrspace(1) @G, align 4
+ call void @clobber(i32 %val)
+ ret void
}
-define internal i32 @test_internal_noalias_load(ptr %ptr) {
-; CHECK-LABEL: define internal i32 @test_internal_noalias_load(
-; CHECK-SAME: ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]]
-; CHECK-NEXT: ret i32 [[VAL]]
+define internal i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) {
+; AMDGCN-LABEL: define internal i32 @test_internal_noalias_load(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
+; AMDGCN-NEXT: ret i32 [[VAL]]
;
- %val = load i32, ptr %ptr, align 4
- ret i32 %val
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ ret i32 %val
+}
+
+define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_noalias(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) {
+; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR1:[0-9]+]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: ret void
+;
+ %val = call i32 @test_internal_noalias_load(ptr addrspace(1) %ptr)
+ call void @clobber(i32 %val)
+ ret void
}
-define i32 @test_call_internal_noalias(ptr noalias %ptr) {
-; CHECK-LABEL: define i32 @test_call_internal_noalias(
-; CHECK-SAME: ptr noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR4:[0-9]+]]
-; CHECK-NEXT: ret i32 [[VAL]]
-;
- %val = call i32 @test_internal_noalias_load(ptr %ptr)
- ret i32 %val
-}
+define internal i32 @test_internal_load(ptr addrspace(1) %ptr) {
+; AMDGCN-LABEL: define internal i32 @test_internal_load(
+; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR0]] {
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT: ret i32 [[VAL]]
+;
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ ret i32 %val
+}
-define internal i32 @test_internal_load(ptr %ptr) {
-; CHECK-LABEL: define internal i32 @test_internal_load(
-; CHECK-SAME: ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
-; CHECK-NEXT: ret i32 [[VAL]]
-;
- %val = load i32, ptr %ptr, align 4
- ret i32 %val
-}
+define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal(
+; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) {
+; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR1]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: ret void
+;
+ %val = call i32 @test_internal_load(ptr addrspace(1) %ptr)
+ call void @clobber(i32 %val)
+ ret void
+}
-define i32 @test_call_internal(ptr %ptr) {
-; CHECK-LABEL: define i32 @test_call_internal(
-; CHECK-SAME: ptr nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR4]]
-; CHECK-NEXT: ret i32 [[VAL]]
-;
- %val = call i32 @test_internal_load(ptr %ptr)
- ret i32 %val
+define amdgpu_kernel void @test_call_ptr() {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_ptr() {
+; AMDGCN-NEXT: [[PTR:%.*]] = call ptr addrspace(1) @get_ptr()
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: ret void
+;
+ %ptr = call ptr addrspace(1) @get_ptr()
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ call void @clobber(i32 %val)
+ ret void
}
-define i32 @test_call_ptr() {
-; CHECK-LABEL: define i32 @test_call_ptr() {
-; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_ptr()
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
-; CHECK-NEXT: ret i32 [[VAL]]
+define amdgpu_kernel void @test_call_noalias_ptr() {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_noalias_ptr() {
+; AMDGCN-NEXT: [[PTR:%.*]] = call ptr addrspace(1) @get_noalias_ptr()
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: ret void
;
- %ptr = call ptr @get_ptr()
- %val = load i32, ptr %ptr, align 4
- ret i32 %val
-}
-
-define i32 @test_call_noalias_ptr() {
-; CHECK-LABEL: define i32 @test_call_noalias_ptr() {
-; CHECK-NEXT: [[PTR:%.*]] = call noalias ptr @get_noalias_ptr()
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]]
-; CHECK-NEXT: ret i32 [[VAL]]
-;
- %ptr = call ptr @get_noalias_ptr()
- %val = load i32, ptr %ptr, align 4
- ret i32 %val
+ %ptr = call ptr addrspace(1) @get_noalias_ptr()
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ call void @clobber(i32 %val)
+ ret void
}
-define i32 @test_selected_load(i1 %cond, ptr noalias %ptr.true, ptr noalias %ptr.false) {
-; CHECK-LABEL: define i32 @test_selected_load(
-; CHECK-SAME: i1 [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr [[PTR_TRUE]], ptr [[PTR_FALSE]]
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]]
-; CHECK-NEXT: ret i32 [[VAL]]
-;
- %ptr = select i1 %cond, ptr %ptr.true, ptr %ptr.false
- %val = load i32, ptr %ptr, align 4
- ret i32 %val
+define amdgpu_kernel void @test_selected_load(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load(
+; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) {
+; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]]
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: ret void
+;
+ %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ call void @clobber(i32 %val)
+ ret void
}
-define i32 @test_selected_load_partial_noalias(i1 %cond, ptr noalias %ptr.true, ptr %ptr.false) {
-; CHECK-LABEL: define i32 @test_selected_load_partial_noalias(
-; CHECK-SAME: i1 [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr [[PTR_TRUE]], ptr [[PTR_FALSE]]
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
-; CHECK-NEXT: ret i32 [[VAL]]
+define amdgpu_kernel void @test_selected_load_partial_noalias(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load_partial_noalias(
+; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) {
+; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]]
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: ret void
;
- %ptr = select i1 %cond, ptr %ptr.true, ptr %ptr.false
- %val = load i32, ptr %ptr, align 4
- ret i32 %val
+ %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ call void @clobber(i32 %val)
+ ret void
}
-define i32 @test_branch_load(i1 %cond, ptr noalias %ptr.true, ptr noalias %ptr.false) {
-; CHECK-LABEL: define i32 @test_branch_load(
-; CHECK-SAME: i1 noundef [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
-; CHECK: [[TRUE]]:
-; CHECK-NEXT: br label %[[FINISH:.*]]
-; CHECK: [[FALSE]]:
-; CHECK-NEXT: br label %[[FINISH]]
-; CHECK: [[FINISH]]:
-; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4, !invariant.load [[META0]]
-; CHECK-NEXT: ret i32 [[VAL]]
+define amdgpu_kernel void @test_branch_load(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load(
+; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) {
+; AMDGCN-NEXT: [[ENTRY:.*:]]
+; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
+; AMDGCN: [[TRUE]]:
+; AMDGCN-NEXT: call void @clobber(i32 noundef 1)
+; AMDGCN-NEXT: br label %[[FINISH:.*]]
+; AMDGCN: [[FALSE]]:
+; AMDGCN-NEXT: br label %[[FINISH]]
+; AMDGCN: [[FINISH]]:
+; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: ret void
;
entry:
br i1 %cond, label %true, label %false
true:
+ call void @clobber(i32 1)
br label %finish
false:
br label %finish
finish:
- %ptr = phi ptr [ %ptr.true, %true ], [ %ptr.false, %false ]
- %val = load i32, ptr %ptr, align 4
- ret i32 %val
-}
-
-define i32 @test_branch_load_partial_noalias(i1 %cond, ptr noalias %ptr.true, ptr %ptr.false) {
-; CHECK-LABEL: define i32 @test_branch_load_partial_noalias(
-; CHECK-SAME: i1 noundef [[COND:%.*]], ptr noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
-; CHECK: [[TRUE]]:
-; CHECK-NEXT: br label %[[FINISH:.*]]
-; CHECK: [[FALSE]]:
-; CHECK-NEXT: br label %[[FINISH]]
-; CHECK: [[FINISH]]:
-; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
-; CHECK-NEXT: ret i32 [[VAL]]
+ %ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ]
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ call void @clobber(i32 %val)
+ ret void
+}
+
+define amdgpu_kernel void @test_branch_load_partial_noalias(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load_partial_noalias(
+; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) {
+; AMDGCN-NEXT: [[ENTRY:.*:]]
+; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
+; AMDGCN: [[TRUE]]:
+; AMDGCN-NEXT: call void @clobber(i32 noundef 1)
+; AMDGCN-NEXT: br label %[[FINISH:.*]]
+; AMDGCN: [[FALSE]]:
+; AMDGCN-NEXT: br label %[[FINISH]]
+; AMDGCN: [[FINISH]]:
+; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: ret void
;
entry:
br i1 %cond, label %true, label %false
true:
+ call void @clobber(i32 1)
br label %finish
false:
br label %finish
finish:
- %ptr = phi ptr [ %ptr.true, %true ], [ %ptr.false, %false ]
- %val = load i32, ptr %ptr, align 4
- ret i32 %val
+ %ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ]
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ call void @clobber(i32 %val)
+ ret void
}
;.
-; CHECK: [[META0]] = !{}
+; AMDGCN: [[META0]] = !{}
;.
>From ef97544e9bc31e61c84e7d1e8b044ac3a61ca164 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Thu, 29 May 2025 16:02:57 -0500
Subject: [PATCH 3/7] Added guards for side-effects on loads.
"Side effects" include volatile loads and atomic loads that are at least
monotonic.
---
.../Transforms/IPO/AttributorAttributes.cpp | 79 +++++-----
.../Attributor/tag-invariant-loads.ll | 139 +++++++++++-------
2 files changed, 135 insertions(+), 83 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index dec36b3e7dcb3..b178cc5951e3d 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12542,39 +12542,44 @@ struct AAInvariantLoadPointerImpl
: public StateWrapper<BitIntegerState<uint8_t, 7>, AAInvariantLoadPointer,
uint8_t> {
// load invariance is implied by, but not equivalent to IS_NOALIAS |
- // IS_READONLY, as load invariance is also implied by all underlying objects
+ // IS_NOEFFECT, as load invariance is also implied by all underlying objects
// being load invariant.
//
- // IS_INVARIANT is set to indicate that the contents of the pointer are
- // *known* to be invariant.
+ // IS_KNOWN_INVARIANT is set to indicate that the contents of the pointer are
+ // *known* to be invariant, and is therefore a pessimistic bit.
enum {
- IS_INVARIANT = 1 << 0,
+ IS_KNOWN_INVARIANT = 1 << 0,
IS_NOALIAS = 1 << 1,
- IS_READONLY = 1 << 2,
+ IS_NOEFFECT = 1 << 2,
+
+ IS_IMPLIED_INVARIANT = IS_NOALIAS | IS_NOEFFECT,
};
- static_assert(getBestState() == (IS_INVARIANT | IS_NOALIAS | IS_READONLY),
+ static_assert(getBestState() == (IS_KNOWN_INVARIANT | IS_IMPLIED_INVARIANT),
"Unexpected best state!");
using Base = StateWrapper<BitIntegerState<uint8_t, 7>, AAInvariantLoadPointer,
uint8_t>;
- // the BitIntegerState is optimistic about noalias and readonly, but
- // pessimistic about invariance
+ // the BitIntegerState is optimistic about IS_NOALIAS and IS_NOEFFECT, but
+ // pessimistic about IS_KNOWN_INVARIANT
AAInvariantLoadPointerImpl(const IRPosition &IRP, Attributor &A)
- : Base(IRP, IS_NOALIAS | IS_READONLY) {}
+ : Base(IRP, IS_IMPLIED_INVARIANT) {}
void initialize(Attributor &A) final {
- // conservatively assume that the pointer's contents are not invariant,
- // until proven otherwise.
- removeAssumedBits(IS_INVARIANT);
+ removeAssumedBits(IS_KNOWN_INVARIANT);
}
bool isKnownInvariant() const final {
- return isKnown(IS_INVARIANT) || isKnown(IS_NOALIAS | IS_READONLY);
+ return isKnown(IS_KNOWN_INVARIANT) || isKnown(IS_IMPLIED_INVARIANT);
}
bool isAssumedInvariant() const final {
- return isAssumed(IS_INVARIANT) || isAssumed(IS_NOALIAS | IS_READONLY);
+ if (isAssumed(IS_KNOWN_INVARIANT) || isAssumed(IS_IMPLIED_INVARIANT))
+ return true;
+ // if the function is callable, optimistically assume that invariance can be
+ // inferred from the caller
+ const auto *F = getAssociatedFunction();
+ return F && isCallableCC(F->getCallingConv());
}
ChangeStatus updateImpl(Attributor &A) override {
@@ -12583,8 +12588,12 @@ struct AAInvariantLoadPointerImpl
ChangeStatus Changed = ChangeStatus::UNCHANGED;
- Changed |= updateNoAlias(A);
- Changed |= updateReadOnly(A);
+ Changed |= checkNoAlias(A);
+ Changed |= checkNoEffect(A);
+
+ // try to infer invariance from underlying objects
+ const auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(
+ getIRPosition(), this, DepClassTy::REQUIRED);
bool UsedAssumedInformation = false;
const auto IsInvariantLoadIfPointer = [&](const Value &V) {
@@ -12601,16 +12610,12 @@ struct AAInvariantLoadPointerImpl
UsedAssumedInformation = true;
return true;
};
-
- const auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(
- getIRPosition(), this, DepClassTy::REQUIRED);
-
if (!AUO->forallUnderlyingObjects(IsInvariantLoadIfPointer))
return indicatePessimisticFixpoint();
if (!UsedAssumedInformation) {
// pointer is known (not assumed) to be invariant
- addKnownBits(IS_INVARIANT);
+ addKnownBits(IS_KNOWN_INVARIANT);
return indicateOptimisticFixpoint() | Changed;
}
@@ -12639,8 +12644,6 @@ struct AAInvariantLoadPointerImpl
return true;
if (auto *LI = dyn_cast<LoadInst>(I)) {
- if (LI->isVolatile() || LI->isAtomic())
- return true;
LI->setMetadata(LLVMContext::MD_invariant_load,
MDNode::get(LI->getContext(), {}));
@@ -12664,8 +12667,8 @@ struct AAInvariantLoadPointerImpl
/// See AbstractAttribute::trackStatistics().
void trackStatistics() const override {}
-protected:
- ChangeStatus updateNoAlias(Attributor &A) {
+private:
+ ChangeStatus checkNoAlias(Attributor &A) {
if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS))
return ChangeStatus::UNCHANGED;
@@ -12710,8 +12713,8 @@ struct AAInvariantLoadPointerImpl
return ChangeStatus::UNCHANGED;
}
- ChangeStatus updateReadOnly(Attributor &A) {
- if (isKnown(IS_READONLY) || !isAssumed(IS_READONLY))
+ ChangeStatus checkNoEffect(Attributor &A) {
+ if (isKnown(IS_NOEFFECT) || !isAssumed(IS_NOEFFECT))
return ChangeStatus::UNCHANGED;
const auto *F = getAssociatedFunction();
@@ -12720,8 +12723,18 @@ struct AAInvariantLoadPointerImpl
return ChangeStatus::UNCHANGED;
if (isCallableCC(F->getCallingConv())) {
- // readonly attribute is only useful if applicable program-wide
- removeAssumedBits(IS_READONLY);
+ // effects cannot be tracked outside of function call;
+ // conservatively assume pointer has effectful uses
+ removeAssumedBits(IS_NOEFFECT);
+ return ChangeStatus::CHANGED;
+ }
+
+ const auto HasNoSideEffects = [](const Use &U, bool &) {
+ const auto *I = dyn_cast<LoadInst>(U.getUser());
+ return !I || !I->mayHaveSideEffects();
+ };
+ if (!A.checkForAllUses(HasNoSideEffects, *this, getAssociatedValue())) {
+ removeAssumedBits(IS_NOEFFECT);
return ChangeStatus::CHANGED;
}
@@ -12729,12 +12742,12 @@ struct AAInvariantLoadPointerImpl
if (const auto *AMemoryBehavior = A.getOrCreateAAFor<AAMemoryBehavior>(
getIRPosition(), this, DepClassTy::REQUIRED)) {
if (!AMemoryBehavior->isAssumedReadOnly()) {
- removeAssumedBits(IS_READONLY);
+ removeAssumedBits(IS_NOEFFECT);
return ChangeStatus::CHANGED;
}
if (AMemoryBehavior->isKnownReadOnly()) {
- addKnownBits(IS_READONLY);
+ addKnownBits(IS_NOEFFECT);
return ChangeStatus::UNCHANGED;
}
@@ -12743,13 +12756,13 @@ struct AAInvariantLoadPointerImpl
if (const auto *Arg = getAssociatedArgument()) {
if (Arg->onlyReadsMemory()) {
- addKnownBits(IS_READONLY);
+ addKnownBits(IS_NOEFFECT);
return ChangeStatus::UNCHANGED;
}
// readonly information is not provided, and cannot be inferred from
// AAMemoryBehavior
- removeAssumedBits(IS_READONLY);
+ removeAssumedBits(IS_NOEFFECT);
return ChangeStatus::CHANGED;
}
diff --git a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
index 02c304822bcb8..b73e6ffafbe4a 100644
--- a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
+++ b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
@@ -1,40 +1,42 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -mtriple=amdgcn-amd-hsa -passes=attributor %s -S | FileCheck %s --check-prefix=AMDGCN
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=attributor %s -S | FileCheck %s --check-prefix=AMDGCN
@G = addrspace(1) global i32 zeroinitializer, align 4
-declare void @clobber(i32)
-declare ptr addrspace(1) @get_ptr()
-declare noalias ptr addrspace(1) @get_noalias_ptr()
+declare void @clobber(i32) #0
+declare ptr addrspace(1) @get_ptr() #0
+attributes #0 = { nofree norecurse nosync nounwind willreturn }
define void @test_nonkernel(ptr addrspace(1) noalias %ptr) {
; AMDGCN-LABEL: define void @test_nonkernel(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4:[0-9]+]]
; AMDGCN-NEXT: ret void
;
%val = load i32, ptr addrspace(1) %ptr, align 4
+ ;; may not be !invariant.load, as the caller may modify %ptr
call void @clobber(i32 %val)
ret void
}
define amdgpu_kernel void @test_plain(ptr addrspace(1) %ptr) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_plain(
-; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) {
+; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
; AMDGCN-NEXT: ret void
;
%val = load i32, ptr addrspace(1) %ptr, align 4
+ ;; may not be !invariant.load, as %ptr may alias a pointer in @clobber
call void @clobber(i32 %val)
ret void
}
define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_ptr(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0:![0-9]+]]
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
; AMDGCN-NEXT: ret void
;
%val = load i32, ptr addrspace(1) %ptr, align 4
@@ -44,13 +46,14 @@ define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) {
define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %swap) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_swap(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) #[[ATTR1]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; AMDGCN-NEXT: store i32 [[SWAP]], ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
; AMDGCN-NEXT: ret void
;
%val = load i32, ptr addrspace(1) %ptr, align 4
+ ;; cannot be !invariant.load due to the write to %ptr
store i32 %swap, ptr addrspace(1) %ptr, align 4
call void @clobber(i32 %val)
ret void
@@ -58,21 +61,22 @@ define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %s
define amdgpu_kernel void @test_volatile(ptr addrspace(1) noalias %ptr) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_volatile(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
; AMDGCN-NEXT: ret void
;
%val = load volatile i32, ptr addrspace(1) %ptr, align 4
+ ;; volatiles loads cannot be !invariant.load
call void @clobber(i32 %val)
ret void
}
-define amdgpu_kernel void @test_atomic(ptr addrspace(1) noalias %ptr) {
-; AMDGCN-LABEL: define amdgpu_kernel void @test_atomic(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) {
-; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+define amdgpu_kernel void @test_unordered(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_unordered(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] {
+; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4, !invariant.load [[META0]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
; AMDGCN-NEXT: ret void
;
%val = load atomic i32, ptr addrspace(1) %ptr unordered, align 4
@@ -80,32 +84,48 @@ define amdgpu_kernel void @test_atomic(ptr addrspace(1) noalias %ptr) {
ret void
}
+define amdgpu_kernel void @test_monotonic(ptr addrspace(1) noalias %ptr) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_monotonic(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] {
+; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] monotonic, align 4
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT: ret void
+;
+ %val = load atomic i32, ptr addrspace(1) %ptr monotonic, align 4
+ ;; atomic loads with ordering guarantees may have side effects
+ call void @clobber(i32 %val)
+ ret void
+}
+
define amdgpu_kernel void @test_global() {
-; AMDGCN-LABEL: define amdgpu_kernel void @test_global() {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_global(
+; AMDGCN-SAME: ) #[[ATTR1]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) @G, align 4
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
; AMDGCN-NEXT: ret void
;
%val = load i32, ptr addrspace(1) @G, align 4
+ ;; is not an !invariant.load as global variables may change
call void @clobber(i32 %val)
ret void
}
define internal i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) {
; AMDGCN-LABEL: define internal i32 @test_internal_noalias_load(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
; AMDGCN-NEXT: ret i32 [[VAL]]
;
%val = load i32, ptr addrspace(1) %ptr, align 4
+ ;; is an !invariant.load due to its only caller @test_call_internal_noalias
ret i32 %val
}
define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias %ptr) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_noalias(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) {
-; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR1:[0-9]+]]
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR1]] {
+; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR5:[0-9]+]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
; AMDGCN-NEXT: ret void
;
%val = call i32 @test_internal_noalias_load(ptr addrspace(1) %ptr)
@@ -115,19 +135,20 @@ define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias %
define internal i32 @test_internal_load(ptr addrspace(1) %ptr) {
; AMDGCN-LABEL: define internal i32 @test_internal_load(
-; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR0]] {
+; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR3]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; AMDGCN-NEXT: ret i32 [[VAL]]
;
%val = load i32, ptr addrspace(1) %ptr, align 4
+ ;; may not be an !invariant.load since the pointer in @test_call_internal may alias
ret i32 %val
}
define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal(
-; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) {
-; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR1]]
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR1]] {
+; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR5]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
; AMDGCN-NEXT: ret void
;
%val = call i32 @test_internal_load(ptr addrspace(1) %ptr)
@@ -135,74 +156,90 @@ define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) {
ret void
}
-define amdgpu_kernel void @test_call_ptr() {
-; AMDGCN-LABEL: define amdgpu_kernel void @test_call_ptr() {
-; AMDGCN-NEXT: [[PTR:%.*]] = call ptr addrspace(1) @get_ptr()
+define internal i32 @test_internal_written(ptr addrspace(1) %ptr) {
+; AMDGCN-LABEL: define internal i32 @test_internal_written(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR3]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
-; AMDGCN-NEXT: ret void
+; AMDGCN-NEXT: ret i32 [[VAL]]
;
- %ptr = call ptr addrspace(1) @get_ptr()
%val = load i32, ptr addrspace(1) %ptr, align 4
+ ;; cannot be an !invariant.load because of the write in caller @test_call_internal_written
+ ret i32 %val
+}
+
+define amdgpu_kernel void @test_call_internal_written(ptr addrspace(1) noalias %ptr, i32 inreg %x) {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_written(
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[X:%.*]]) #[[ATTR1]] {
+; AMDGCN-NEXT: store i32 [[X]], ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_written(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR]]) #[[ATTR5]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT: ret void
+;
+ store i32 %x, ptr addrspace(1) %ptr
+ %val = call i32 @test_internal_written(ptr addrspace(1) %ptr)
call void @clobber(i32 %val)
ret void
}
-define amdgpu_kernel void @test_call_noalias_ptr() {
-; AMDGCN-LABEL: define amdgpu_kernel void @test_call_noalias_ptr() {
-; AMDGCN-NEXT: [[PTR:%.*]] = call ptr addrspace(1) @get_noalias_ptr()
+define amdgpu_kernel void @test_call_ptr() {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_ptr(
+; AMDGCN-SAME: ) #[[ATTR1]] {
+; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_ptr() #[[ATTR4]]
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
; AMDGCN-NEXT: ret void
;
- %ptr = call ptr addrspace(1) @get_noalias_ptr()
+ %ptr = call ptr addrspace(1) @get_ptr()
%val = load i32, ptr addrspace(1) %ptr, align 4
+ ;; may not be an !invariant.load since %ptr may alias
call void @clobber(i32 %val)
ret void
}
define amdgpu_kernel void @test_selected_load(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load(
-; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) {
+; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] {
; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]]
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
; AMDGCN-NEXT: ret void
;
%ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false
%val = load i32, ptr addrspace(1) %ptr, align 4
+ ;; either pointer yields an !invariant.load
call void @clobber(i32 %val)
ret void
}
define amdgpu_kernel void @test_selected_load_partial_noalias(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load_partial_noalias(
-; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) {
+; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] {
; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]]
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
; AMDGCN-NEXT: ret void
;
%ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false
%val = load i32, ptr addrspace(1) %ptr, align 4
+ ;; %ptr.false may alias, so no !invariant.load
call void @clobber(i32 %val)
ret void
}
define amdgpu_kernel void @test_branch_load(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load(
-; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) {
+; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] {
; AMDGCN-NEXT: [[ENTRY:.*:]]
; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
; AMDGCN: [[TRUE]]:
-; AMDGCN-NEXT: call void @clobber(i32 noundef 1)
+; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR4]]
; AMDGCN-NEXT: br label %[[FINISH:.*]]
; AMDGCN: [[FALSE]]:
; AMDGCN-NEXT: br label %[[FINISH]]
; AMDGCN: [[FINISH]]:
; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
; AMDGCN-NEXT: ret void
;
entry:
@@ -215,24 +252,25 @@ false:
finish:
%ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ]
%val = load i32, ptr addrspace(1) %ptr, align 4
+ ;; either pointer yields an !invariant.load
call void @clobber(i32 %val)
ret void
}
define amdgpu_kernel void @test_branch_load_partial_noalias(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load_partial_noalias(
-; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) {
+; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] {
; AMDGCN-NEXT: [[ENTRY:.*:]]
; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
; AMDGCN: [[TRUE]]:
-; AMDGCN-NEXT: call void @clobber(i32 noundef 1)
+; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR4]]
; AMDGCN-NEXT: br label %[[FINISH:.*]]
; AMDGCN: [[FALSE]]:
; AMDGCN-NEXT: br label %[[FINISH]]
; AMDGCN: [[FINISH]]:
; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]])
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
; AMDGCN-NEXT: ret void
;
entry:
@@ -245,6 +283,7 @@ false:
finish:
%ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ]
%val = load i32, ptr addrspace(1) %ptr, align 4
+ ;; ptr.false may alias, so no !invariant.load
call void @clobber(i32 %val)
ret void
}
>From fe750fd8a26093fedef3c791e264f3fa1f1415df Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Mon, 2 Jun 2025 21:23:47 -0500
Subject: [PATCH 4/7] Corrected and refactored attributor logic.
---
llvm/include/llvm/Transforms/IPO/Attributor.h | 2 +
.../Transforms/IPO/AttributorAttributes.cpp | 172 +++++++++++-------
.../Attributor/tag-invariant-loads.ll | 118 +++++++-----
3 files changed, 179 insertions(+), 113 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 38996bb051328..55be0838d464a 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6308,9 +6308,11 @@ struct AAInvariantLoadPointer : public AbstractAttribute {
/// Return true if the pointer's contents are known to remain invariant.
virtual bool isKnownInvariant() const = 0;
+ virtual bool isKnownLocallyInvariant() const = 0;
/// Return true if the pointer's contents are assumed to remain invariant.
virtual bool isAssumedInvariant() const = 0;
+ virtual bool isAssumedLocallyInvariant() const = 0;
/// See AbstractAttribute::getName().
StringRef getName() const override { return "AAInvariantLoadPointer"; }
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index b178cc5951e3d..cfe7611276feb 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12539,47 +12539,49 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo {
namespace {
struct AAInvariantLoadPointerImpl
- : public StateWrapper<BitIntegerState<uint8_t, 7>, AAInvariantLoadPointer,
- uint8_t> {
- // load invariance is implied by, but not equivalent to IS_NOALIAS |
- // IS_NOEFFECT, as load invariance is also implied by all underlying objects
- // being load invariant.
- //
- // IS_KNOWN_INVARIANT is set to indicate that the contents of the pointer are
- // *known* to be invariant, and is therefore a pessimistic bit.
- enum {
- IS_KNOWN_INVARIANT = 1 << 0,
- IS_NOALIAS = 1 << 1,
- IS_NOEFFECT = 1 << 2,
+ : public StateWrapper<BitIntegerState<uint8_t, 15>,
+ AAInvariantLoadPointer> {
- IS_IMPLIED_INVARIANT = IS_NOALIAS | IS_NOEFFECT,
+ enum {
+ // pointer does not alias within the bounds of the function
+ IS_NOALIAS = 1 << 0,
+ // pointer is not involved in any effectful instructions within the bounds
+ // of the function
+ IS_NOEFFECT = 1 << 1,
+ // loads are invariant within the bounds of the function
+ IS_LOCALLY_INVARIANT = 1 << 2,
+ // memory lifetime is constrained within the bounds of the function
+ IS_LOCALLY_CONSTRAINED = 1 << 3,
+
+ IS_BEST_STATE = IS_NOALIAS | IS_NOEFFECT | IS_LOCALLY_INVARIANT |
+ IS_LOCALLY_CONSTRAINED,
};
- static_assert(getBestState() == (IS_KNOWN_INVARIANT | IS_IMPLIED_INVARIANT),
- "Unexpected best state!");
+ static_assert(getBestState() == IS_BEST_STATE, "Unexpected best state");
- using Base = StateWrapper<BitIntegerState<uint8_t, 7>, AAInvariantLoadPointer,
- uint8_t>;
+ using Base =
+ StateWrapper<BitIntegerState<uint8_t, 15>, AAInvariantLoadPointer>;
// the BitIntegerState is optimistic about IS_NOALIAS and IS_NOEFFECT, but
// pessimistic about IS_KNOWN_INVARIANT
AAInvariantLoadPointerImpl(const IRPosition &IRP, Attributor &A)
- : Base(IRP, IS_IMPLIED_INVARIANT) {}
-
- void initialize(Attributor &A) final {
- removeAssumedBits(IS_KNOWN_INVARIANT);
- }
+ : Base(IRP) {}
bool isKnownInvariant() const final {
- return isKnown(IS_KNOWN_INVARIANT) || isKnown(IS_IMPLIED_INVARIANT);
+ return isKnownLocallyInvariant() && isKnown(IS_LOCALLY_CONSTRAINED);
+ }
+ bool isKnownLocallyInvariant() const final {
+ if (isKnown(IS_LOCALLY_INVARIANT))
+ return true;
+ return isKnown(IS_NOALIAS | IS_NOEFFECT);
}
bool isAssumedInvariant() const final {
- if (isAssumed(IS_KNOWN_INVARIANT) || isAssumed(IS_IMPLIED_INVARIANT))
+ return isAssumedLocallyInvariant() && isAssumed(IS_LOCALLY_CONSTRAINED);
+ }
+ bool isAssumedLocallyInvariant() const final {
+ if (isAssumed(IS_LOCALLY_INVARIANT))
return true;
- // if the function is callable, optimistically assume that invariance can be
- // inferred from the caller
- const auto *F = getAssociatedFunction();
- return F && isCallableCC(F->getCallingConv());
+ return isAssumed(IS_NOALIAS | IS_NOEFFECT);
}
ChangeStatus updateImpl(Attributor &A) override {
@@ -12589,6 +12591,9 @@ struct AAInvariantLoadPointerImpl
ChangeStatus Changed = ChangeStatus::UNCHANGED;
Changed |= checkNoAlias(A);
+ if (requiresNoAlias() && !isAssumed(IS_NOALIAS))
+ return indicatePessimisticFixpoint();
+
Changed |= checkNoEffect(A);
// try to infer invariance from underlying objects
@@ -12602,9 +12607,9 @@ struct AAInvariantLoadPointerImpl
const auto *IsInvariantLoadPointer =
A.getOrCreateAAFor<AAInvariantLoadPointer>(IRPosition::value(V), this,
DepClassTy::REQUIRED);
- if (IsInvariantLoadPointer->isKnownInvariant())
+ if (IsInvariantLoadPointer->isKnownLocallyInvariant())
return true;
- if (!IsInvariantLoadPointer->isAssumedInvariant())
+ if (!IsInvariantLoadPointer->isAssumedLocallyInvariant())
return false;
UsedAssumedInformation = true;
@@ -12614,9 +12619,9 @@ struct AAInvariantLoadPointerImpl
return indicatePessimisticFixpoint();
if (!UsedAssumedInformation) {
- // pointer is known (not assumed) to be invariant
- addKnownBits(IS_KNOWN_INVARIANT);
- return indicateOptimisticFixpoint() | Changed;
+ // pointer is known (not assumed) to be locally invariant
+ addKnownBits(IS_LOCALLY_INVARIANT);
+ return Changed;
}
return Changed;
@@ -12658,28 +12663,31 @@ struct AAInvariantLoadPointerImpl
/// See AbstractAttribute::getAsStr().
const std::string getAsStr(Attributor *) const override {
- std::string Str;
- raw_string_ostream OS(Str);
- OS << "load invariant pointer: " << isKnown() << '\n';
- return Str;
+ if (isKnownInvariant())
+ return "load-invariant pointer";
+ return "non-invariant pointer";
}
/// See AbstractAttribute::trackStatistics().
void trackStatistics() const override {}
+protected:
+ /// Indicate that invariance necessarily requires the pointer to be noalias.
+ virtual bool requiresNoAlias() const { return false; }
+
private:
+ bool isExternal() const {
+ const auto *F = getAssociatedFunction();
+ if (!F)
+ return true;
+ return isCallableCC(F->getCallingConv()) &&
+ getPositionKind() != IRP_CALL_SITE_RETURNED;
+ }
+
ChangeStatus checkNoAlias(Attributor &A) {
if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS))
return ChangeStatus::UNCHANGED;
- const auto *F = getAssociatedFunction();
-
- if (F && isCallableCC(F->getCallingConv())) {
- // program-wide alias information cannot be inferred
- removeAssumedBits(IS_NOALIAS);
- return ChangeStatus::CHANGED;
- }
-
// try to use AANoAlias
if (const auto *ANoAlias = A.getOrCreateAAFor<AANoAlias>(
getIRPosition(), this, DepClassTy::REQUIRED)) {
@@ -12696,8 +12704,8 @@ struct AAInvariantLoadPointerImpl
return ChangeStatus::UNCHANGED;
}
- // if the function is not callable, try to infer noalias from argument
- // attribute, since it is applicable for the duration of the function
+ // try to infer noalias from argument attribute, since it is applicable for
+ // the duration of the function
if (const auto *Arg = getAssociatedArgument()) {
if (Arg->hasNoAliasAttr()) {
addKnownBits(IS_NOALIAS);
@@ -12717,34 +12725,23 @@ struct AAInvariantLoadPointerImpl
if (isKnown(IS_NOEFFECT) || !isAssumed(IS_NOEFFECT))
return ChangeStatus::UNCHANGED;
- const auto *F = getAssociatedFunction();
-
- if (!F)
- return ChangeStatus::UNCHANGED;
+ if (!getAssociatedFunction())
+ return indicatePessimisticFixpoint();
- if (isCallableCC(F->getCallingConv())) {
- // effects cannot be tracked outside of function call;
- // conservatively assume pointer has effectful uses
- removeAssumedBits(IS_NOEFFECT);
- return ChangeStatus::CHANGED;
- }
+ const auto HasNoEffectLoads = [&](const Use &U, bool &) {
+ if (const auto *LI = dyn_cast<LoadInst>(U.getUser()))
+ return !LI->mayHaveSideEffects();
- const auto HasNoSideEffects = [](const Use &U, bool &) {
- const auto *I = dyn_cast<LoadInst>(U.getUser());
- return !I || !I->mayHaveSideEffects();
+ return true;
};
- if (!A.checkForAllUses(HasNoSideEffects, *this, getAssociatedValue())) {
- removeAssumedBits(IS_NOEFFECT);
- return ChangeStatus::CHANGED;
- }
+ if (!A.checkForAllUses(HasNoEffectLoads, *this, getAssociatedValue()))
+ return indicatePessimisticFixpoint();
// try to use AAMemoryBehavior to infer readonly attribute
if (const auto *AMemoryBehavior = A.getOrCreateAAFor<AAMemoryBehavior>(
getIRPosition(), this, DepClassTy::REQUIRED)) {
- if (!AMemoryBehavior->isAssumedReadOnly()) {
- removeAssumedBits(IS_NOEFFECT);
- return ChangeStatus::CHANGED;
- }
+ if (!AMemoryBehavior->isAssumedReadOnly())
+ return indicatePessimisticFixpoint();
if (AMemoryBehavior->isKnownReadOnly()) {
addKnownBits(IS_NOEFFECT);
@@ -12762,8 +12759,7 @@ struct AAInvariantLoadPointerImpl
// readonly information is not provided, and cannot be inferred from
// AAMemoryBehavior
- removeAssumedBits(IS_NOEFFECT);
- return ChangeStatus::CHANGED;
+ return indicatePessimisticFixpoint();
}
return ChangeStatus::UNCHANGED;
@@ -12778,17 +12774,53 @@ struct AAInvariantLoadPointerFloating final : AAInvariantLoadPointerImpl {
struct AAInvariantLoadPointerReturned final : AAInvariantLoadPointerImpl {
AAInvariantLoadPointerReturned(const IRPosition &IRP, Attributor &A)
: AAInvariantLoadPointerImpl(IRP, A) {}
+
+ void initialize(Attributor &) override {
+ removeAssumedBits(IS_LOCALLY_CONSTRAINED);
+ }
};
struct AAInvariantLoadPointerCallSiteReturned final
: AAInvariantLoadPointerImpl {
AAInvariantLoadPointerCallSiteReturned(const IRPosition &IRP, Attributor &A)
: AAInvariantLoadPointerImpl(IRP, A) {}
+
+ void initialize(Attributor &A) override {
+ const auto *F = getAssociatedFunction();
+ assert(F && "no associated function for return from call");
+
+ // not much we can say about opaque functions
+ if (F->isDeclaration() || F->isIntrinsic()) {
+ if (!F->onlyReadsMemory() || !F->hasNoSync()) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+ }
+ AAInvariantLoadPointerImpl::initialize(A);
+ }
+
+protected:
+ virtual bool requiresNoAlias() const override { return true; }
};
struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl {
AAInvariantLoadPointerArgument(const IRPosition &IRP, Attributor &A)
: AAInvariantLoadPointerImpl(IRP, A) {}
+
+ void initialize(Attributor &) override {
+ const auto *F = getAssociatedFunction();
+ assert(F && "no associated function to argument");
+
+ if (isCallableCC(F->getCallingConv()) && !F->hasLocalLinkage())
+ removeAssumedBits(IS_LOCALLY_CONSTRAINED);
+ }
+
+protected:
+ virtual bool requiresNoAlias() const override {
+ const auto *F = getAssociatedFunction();
+ assert(F && "no associated function to argument");
+ return !isCallableCC(F->getCallingConv());
+ }
};
struct AAInvariantLoadPointerCallSiteArgument final
diff --git a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
index b73e6ffafbe4a..4cbf3f8edc8c6 100644
--- a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
+++ b/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
@@ -4,13 +4,16 @@
@G = addrspace(1) global i32 zeroinitializer, align 4
declare void @clobber(i32) #0
declare ptr addrspace(1) @get_ptr() #0
+declare noalias ptr addrspace(1) @get_noalias_ptr() #0
+declare noalias ptr addrspace(1) @get_untouched_ptr() #1
attributes #0 = { nofree norecurse nosync nounwind willreturn }
+attributes #1 = { nofree norecurse nosync nounwind willreturn readonly }
define void @test_nonkernel(ptr addrspace(1) noalias %ptr) {
; AMDGCN-LABEL: define void @test_nonkernel(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4:[0-9]+]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5:[0-9]+]]
; AMDGCN-NEXT: ret void
;
%val = load i32, ptr addrspace(1) %ptr, align 4
@@ -21,9 +24,9 @@ define void @test_nonkernel(ptr addrspace(1) noalias %ptr) {
define amdgpu_kernel void @test_plain(ptr addrspace(1) %ptr) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_plain(
-; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]]
; AMDGCN-NEXT: ret void
;
%val = load i32, ptr addrspace(1) %ptr, align 4
@@ -34,9 +37,9 @@ define amdgpu_kernel void @test_plain(ptr addrspace(1) %ptr) {
define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_ptr(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0:![0-9]+]]
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]]
; AMDGCN-NEXT: ret void
;
%val = load i32, ptr addrspace(1) %ptr, align 4
@@ -46,10 +49,10 @@ define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) {
define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %swap) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_swap(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) #[[ATTR2]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; AMDGCN-NEXT: store i32 [[SWAP]], ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]]
; AMDGCN-NEXT: ret void
;
%val = load i32, ptr addrspace(1) %ptr, align 4
@@ -61,9 +64,9 @@ define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %s
define amdgpu_kernel void @test_volatile(ptr addrspace(1) noalias %ptr) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_volatile(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]]
; AMDGCN-NEXT: ret void
;
%val = load volatile i32, ptr addrspace(1) %ptr, align 4
@@ -74,9 +77,9 @@ define amdgpu_kernel void @test_volatile(ptr addrspace(1) noalias %ptr) {
define amdgpu_kernel void @test_unordered(ptr addrspace(1) noalias %ptr) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_unordered(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4, !invariant.load [[META0]]
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]]
; AMDGCN-NEXT: ret void
;
%val = load atomic i32, ptr addrspace(1) %ptr unordered, align 4
@@ -86,9 +89,9 @@ define amdgpu_kernel void @test_unordered(ptr addrspace(1) noalias %ptr) {
define amdgpu_kernel void @test_monotonic(ptr addrspace(1) noalias %ptr) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_monotonic(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] monotonic, align 4
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]]
; AMDGCN-NEXT: ret void
;
%val = load atomic i32, ptr addrspace(1) %ptr monotonic, align 4
@@ -99,9 +102,9 @@ define amdgpu_kernel void @test_monotonic(ptr addrspace(1) noalias %ptr) {
define amdgpu_kernel void @test_global() {
; AMDGCN-LABEL: define amdgpu_kernel void @test_global(
-; AMDGCN-SAME: ) #[[ATTR1]] {
+; AMDGCN-SAME: ) #[[ATTR2]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) @G, align 4
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]]
; AMDGCN-NEXT: ret void
;
%val = load i32, ptr addrspace(1) @G, align 4
@@ -112,7 +115,7 @@ define amdgpu_kernel void @test_global() {
define internal i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) {
; AMDGCN-LABEL: define internal i32 @test_internal_noalias_load(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR4:[0-9]+]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
; AMDGCN-NEXT: ret i32 [[VAL]]
;
@@ -123,9 +126,9 @@ define internal i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) {
define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias %ptr) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_noalias(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR1]] {
-; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR5:[0-9]+]]
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR6:[0-9]+]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]]
; AMDGCN-NEXT: ret void
;
%val = call i32 @test_internal_noalias_load(ptr addrspace(1) %ptr)
@@ -133,9 +136,9 @@ define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias %
ret void
}
-define internal i32 @test_internal_load(ptr addrspace(1) %ptr) {
+define internal i32 @test_internal_load(ptr addrspace(1) noalias %ptr) {
; AMDGCN-LABEL: define internal i32 @test_internal_load(
-; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR3]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR4]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; AMDGCN-NEXT: ret i32 [[VAL]]
;
@@ -146,9 +149,9 @@ define internal i32 @test_internal_load(ptr addrspace(1) %ptr) {
define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal(
-; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR1]] {
-; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR5]]
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR6]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]]
; AMDGCN-NEXT: ret void
;
%val = call i32 @test_internal_load(ptr addrspace(1) %ptr)
@@ -158,7 +161,7 @@ define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) {
define internal i32 @test_internal_written(ptr addrspace(1) %ptr) {
; AMDGCN-LABEL: define internal i32 @test_internal_written(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR3]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR4]] {
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
; AMDGCN-NEXT: ret i32 [[VAL]]
;
@@ -169,24 +172,24 @@ define internal i32 @test_internal_written(ptr addrspace(1) %ptr) {
define amdgpu_kernel void @test_call_internal_written(ptr addrspace(1) noalias %ptr, i32 inreg %x) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_written(
-; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[X:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: ptr addrspace(1) noalias nofree captures(none) [[PTR:%.*]], i32 inreg [[X:%.*]]) #[[ATTR2]] {
+; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_written(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR6]]
; AMDGCN-NEXT: store i32 [[X]], ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_written(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR]]) #[[ATTR5]]
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]]
; AMDGCN-NEXT: ret void
;
- store i32 %x, ptr addrspace(1) %ptr
%val = call i32 @test_internal_written(ptr addrspace(1) %ptr)
+ store i32 %x, ptr addrspace(1) %ptr
call void @clobber(i32 %val)
ret void
}
define amdgpu_kernel void @test_call_ptr() {
; AMDGCN-LABEL: define amdgpu_kernel void @test_call_ptr(
-; AMDGCN-SAME: ) #[[ATTR1]] {
-; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_ptr() #[[ATTR4]]
+; AMDGCN-SAME: ) #[[ATTR2]] {
+; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_ptr() #[[ATTR5]]
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]]
; AMDGCN-NEXT: ret void
;
%ptr = call ptr addrspace(1) @get_ptr()
@@ -196,12 +199,41 @@ define amdgpu_kernel void @test_call_ptr() {
ret void
}
+define amdgpu_kernel void @test_call_noalias_ptr() {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_noalias_ptr(
+; AMDGCN-SAME: ) #[[ATTR2]] {
+; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_noalias_ptr() #[[ATTR5]]
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]]
+; AMDGCN-NEXT: ret void
+;
+ %ptr = call ptr addrspace(1) @get_noalias_ptr()
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ ;; may not be an !invariant.load since %ptr may have been written to before returning
+ call void @clobber(i32 %val)
+ ret void
+}
+
+define amdgpu_kernel void @test_call_untouched_ptr() {
+; AMDGCN-LABEL: define amdgpu_kernel void @test_call_untouched_ptr(
+; AMDGCN-SAME: ) #[[ATTR2]] {
+; AMDGCN-NEXT: [[PTR:%.*]] = call noalias align 4 ptr addrspace(1) @get_untouched_ptr() #[[ATTR7:[0-9]+]]
+; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]]
+; AMDGCN-NEXT: ret void
+;
+ %ptr = call ptr addrspace(1) @get_untouched_ptr()
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ call void @clobber(i32 %val)
+ ret void
+}
+
define amdgpu_kernel void @test_selected_load(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load(
-; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]]
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]]
; AMDGCN-NEXT: ret void
;
%ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false
@@ -213,10 +245,10 @@ define amdgpu_kernel void @test_selected_load(i1 inreg %cond, ptr addrspace(1) n
define amdgpu_kernel void @test_selected_load_partial_noalias(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load_partial_noalias(
-; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]]
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]]
; AMDGCN-NEXT: ret void
;
%ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false
@@ -228,18 +260,18 @@ define amdgpu_kernel void @test_selected_load_partial_noalias(i1 inreg %cond, pt
define amdgpu_kernel void @test_branch_load(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load(
-; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
; AMDGCN-NEXT: [[ENTRY:.*:]]
; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
; AMDGCN: [[TRUE]]:
-; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR4]]
+; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR5]]
; AMDGCN-NEXT: br label %[[FINISH:.*]]
; AMDGCN: [[FALSE]]:
; AMDGCN-NEXT: br label %[[FINISH]]
; AMDGCN: [[FINISH]]:
; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]]
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]]
; AMDGCN-NEXT: ret void
;
entry:
@@ -259,18 +291,18 @@ finish:
define amdgpu_kernel void @test_branch_load_partial_noalias(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) {
; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load_partial_noalias(
-; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR1]] {
+; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] {
; AMDGCN-NEXT: [[ENTRY:.*:]]
; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]]
; AMDGCN: [[TRUE]]:
-; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR4]]
+; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR5]]
; AMDGCN-NEXT: br label %[[FINISH:.*]]
; AMDGCN: [[FALSE]]:
; AMDGCN-NEXT: br label %[[FINISH]]
; AMDGCN: [[FINISH]]:
; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ]
; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4
-; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR4]]
+; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR5]]
; AMDGCN-NEXT: ret void
;
entry:
>From 2c9f8a256b359299f69dc4f036e735a1c711f958 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Tue, 3 Jun 2025 09:12:23 -0500
Subject: [PATCH 5/7] Modified checks for unrelated but affected tests
It seems the attributor cleans up more dead instructions.
---
.../Attributor/dereferenceable-1.ll | 1 -
.../Attributor/value-simplify-local-remote.ll | 22 +++++++------------
2 files changed, 8 insertions(+), 15 deletions(-)
diff --git a/llvm/test/Transforms/Attributor/dereferenceable-1.ll b/llvm/test/Transforms/Attributor/dereferenceable-1.ll
index 07e2d5ea15752..5bff2a2e6b208 100644
--- a/llvm/test/Transforms/Attributor/dereferenceable-1.ll
+++ b/llvm/test/Transforms/Attributor/dereferenceable-1.ll
@@ -207,7 +207,6 @@ define void @f7_1(ptr %ptr, i1 %cnd) {
; CHECK-LABEL: define {{[^@]+}}@f7_1
; CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[PTR:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR2]] {
; CHECK-NEXT: [[A:%.*]] = tail call i32 @unkown_f(ptr noundef nonnull align 4 dereferenceable(4) [[PTR]]) #[[ATTR1]]
-; CHECK-NEXT: [[PTR_0:%.*]] = load i32, ptr [[PTR]], align 4
; CHECK-NEXT: [[B:%.*]] = tail call i32 @unkown_f(ptr noundef nonnull align 4 dereferenceable(4) [[PTR]]) #[[ATTR1]]
; CHECK-NEXT: br i1 [[CND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK: if.true:
diff --git a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll
index 374d5ba7ff52b..4767244800d21 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll
@@ -135,7 +135,7 @@ define internal %S @foo.1(ptr %foo.this) {
; TUNIT-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8
; TUNIT-NEXT: store ptr [[FOO_THIS]], ptr [[FOO_THIS]], align 8
; TUNIT-NEXT: call void @bar.2(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR5:[0-9]+]]
-; TUNIT-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8
+; TUNIT-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8:![0-9]+]]
; TUNIT-NEXT: ret [[S]] [[FOO_RET]]
;
; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite)
@@ -145,7 +145,7 @@ define internal %S @foo.1(ptr %foo.this) {
; CGSCC-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8
; CGSCC-NEXT: store ptr [[FOO_THIS]], ptr [[FOO_THIS]], align 8
; CGSCC-NEXT: call void @bar.2(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR6]]
-; CGSCC-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8
+; CGSCC-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8:![0-9]+]]
; CGSCC-NEXT: ret [[S]] [[FOO_RET]]
;
entry:
@@ -234,7 +234,7 @@ define internal %S @bar.5(ptr %this) {
; TUNIT-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8
; TUNIT-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8
; TUNIT-NEXT: call void @baz.6(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) [[RETVAL]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR4]]
-; TUNIT-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8
+; TUNIT-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8]]
; TUNIT-NEXT: ret [[S]] [[BAR_RET]]
;
; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite)
@@ -244,7 +244,7 @@ define internal %S @bar.5(ptr %this) {
; CGSCC-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8
; CGSCC-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8
; CGSCC-NEXT: call void @baz.6(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR9:[0-9]+]]
-; CGSCC-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8
+; CGSCC-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8]]
; CGSCC-NEXT: ret [[S]] [[BAR_RET]]
;
entry:
@@ -286,7 +286,7 @@ define internal void @boom(ptr %this, ptr %data) {
; TUNIT-NEXT: entry:
; TUNIT-NEXT: [[DATA_ADDR:%.*]] = alloca ptr, i32 0, align 8
; TUNIT-NEXT: store ptr [[DATA]], ptr [[DATA_ADDR]], align 8
-; TUNIT-NEXT: [[V:%.*]] = load ptr, ptr [[DATA_ADDR]], align 8
+; TUNIT-NEXT: [[V:%.*]] = load ptr, ptr [[DATA_ADDR]], align 8, !invariant.load [[META8]]
; TUNIT-NEXT: store ptr [[V]], ptr [[THIS]], align 8
; TUNIT-NEXT: ret void
;
@@ -342,14 +342,6 @@ define %S.2 @t3.helper() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RETVAL:%.*]] = alloca [[S_2:%.*]], align 8
; CHECK-NEXT: call void @ext1(ptr noundef nonnull align 8 dereferenceable(24) [[RETVAL]])
-; CHECK-NEXT: [[DOTFCA_0_LOAD:%.*]] = load ptr, ptr [[RETVAL]], align 8
-; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[S_2]] poison, ptr [[DOTFCA_0_LOAD]], 0
-; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [[S_2]], ptr [[RETVAL]], i32 0, i32 1
-; CHECK-NEXT: [[DOTFCA_1_LOAD:%.*]] = load i64, ptr [[DOTFCA_1_GEP]], align 8
-; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [[S_2]] [[DOTFCA_0_INSERT]], i64 [[DOTFCA_1_LOAD]], 1
-; CHECK-NEXT: [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [[S_2]], ptr [[RETVAL]], i32 0, i32 2
-; CHECK-NEXT: [[DOTFCA_2_LOAD:%.*]] = load i64, ptr [[DOTFCA_2_GEP]], align 8
-; CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [[S_2]] [[DOTFCA_1_INSERT]], i64 [[DOTFCA_2_LOAD]], 2
; CHECK-NEXT: ret [[S_2]] zeroinitializer
;
entry:
@@ -508,7 +500,7 @@ define internal %S @t4a(ptr %this) {
; CGSCC-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, i32 0, align 8
; CGSCC-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8
; CGSCC-NEXT: call void @t4b(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]]) #[[ATTR6]]
-; CGSCC-NEXT: [[TMP0:%.*]] = load [[S]], ptr [[RETVAL]], align 8
+; CGSCC-NEXT: [[TMP0:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8]]
; CGSCC-NEXT: ret [[S]] [[TMP0]]
;
entry:
@@ -623,6 +615,7 @@ entry:
; TUNIT: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
; TUNIT: [[META6:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 2}
; TUNIT: [[META7:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3}
+; TUNIT: [[META8]] = !{}
;.
; CGSCC: [[META0:![0-9]+]] = !{i32 2, !"SDK Version", [2 x i32] [i32 11, i32 5]}
; CGSCC: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
@@ -632,4 +625,5 @@ entry:
; CGSCC: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
; CGSCC: [[META6:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 2}
; CGSCC: [[META7:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3}
+; CGSCC: [[META8]] = !{}
;.
>From fcbc5a27d02fe5f4ab671abaf0eada1194991815 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Tue, 3 Jun 2025 11:01:30 -0500
Subject: [PATCH 6/7] Incorporated feedback.
---
.../Transforms/IPO/AttributorAttributes.cpp | 112 ++++++++++--------
1 file changed, 64 insertions(+), 48 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index cfe7611276feb..66436262bf1f7 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12569,6 +12569,7 @@ struct AAInvariantLoadPointerImpl
bool isKnownInvariant() const final {
return isKnownLocallyInvariant() && isKnown(IS_LOCALLY_CONSTRAINED);
}
+
bool isKnownLocallyInvariant() const final {
if (isKnown(IS_LOCALLY_INVARIANT))
return true;
@@ -12578,6 +12579,7 @@ struct AAInvariantLoadPointerImpl
bool isAssumedInvariant() const final {
return isAssumedLocallyInvariant() && isAssumed(IS_LOCALLY_CONSTRAINED);
}
+
bool isAssumedLocallyInvariant() const final {
if (isAssumed(IS_LOCALLY_INVARIANT))
return true;
@@ -12585,44 +12587,15 @@ struct AAInvariantLoadPointerImpl
}
ChangeStatus updateImpl(Attributor &A) override {
- if (isKnownInvariant())
- return indicateOptimisticFixpoint();
-
ChangeStatus Changed = ChangeStatus::UNCHANGED;
- Changed |= checkNoAlias(A);
+ Changed |= updateNoAlias(A);
if (requiresNoAlias() && !isAssumed(IS_NOALIAS))
return indicatePessimisticFixpoint();
- Changed |= checkNoEffect(A);
-
- // try to infer invariance from underlying objects
- const auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(
- getIRPosition(), this, DepClassTy::REQUIRED);
-
- bool UsedAssumedInformation = false;
- const auto IsInvariantLoadIfPointer = [&](const Value &V) {
- if (!V.getType()->isPointerTy())
- return true;
- const auto *IsInvariantLoadPointer =
- A.getOrCreateAAFor<AAInvariantLoadPointer>(IRPosition::value(V), this,
- DepClassTy::REQUIRED);
- if (IsInvariantLoadPointer->isKnownLocallyInvariant())
- return true;
- if (!IsInvariantLoadPointer->isAssumedLocallyInvariant())
- return false;
-
- UsedAssumedInformation = true;
- return true;
- };
- if (!AUO->forallUnderlyingObjects(IsInvariantLoadIfPointer))
- return indicatePessimisticFixpoint();
+ Changed |= updateNoEffect(A);
- if (!UsedAssumedInformation) {
- // pointer is known (not assumed) to be locally invariant
- addKnownBits(IS_LOCALLY_INVARIANT);
- return Changed;
- }
+ Changed |= updateLocalInvariance(A);
return Changed;
}
@@ -12632,7 +12605,7 @@ struct AAInvariantLoadPointerImpl
return ChangeStatus::UNCHANGED;
ChangeStatus Changed = ChangeStatus::UNCHANGED;
- Value *Ptr = &getAssociatedValue();
+ const Value *Ptr = &getAssociatedValue();
const auto TagInvariantLoads = [&](const Use &U, bool &) {
if (U.get() != Ptr)
return true;
@@ -12649,7 +12622,6 @@ struct AAInvariantLoadPointerImpl
return true;
if (auto *LI = dyn_cast<LoadInst>(I)) {
-
LI->setMetadata(LLVMContext::MD_invariant_load,
MDNode::get(LI->getContext(), {}));
Changed = ChangeStatus::CHANGED;
@@ -12677,14 +12649,14 @@ struct AAInvariantLoadPointerImpl
private:
bool isExternal() const {
- const auto *F = getAssociatedFunction();
+ const Function *F = getAssociatedFunction();
if (!F)
return true;
return isCallableCC(F->getCallingConv()) &&
getPositionKind() != IRP_CALL_SITE_RETURNED;
}
- ChangeStatus checkNoAlias(Attributor &A) {
+ ChangeStatus updateNoAlias(Attributor &A) {
if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS))
return ChangeStatus::UNCHANGED;
@@ -12693,7 +12665,7 @@ struct AAInvariantLoadPointerImpl
getIRPosition(), this, DepClassTy::REQUIRED)) {
if (ANoAlias->isKnownNoAlias()) {
addKnownBits(IS_NOALIAS);
- return ChangeStatus::UNCHANGED;
+ return ChangeStatus::CHANGED;
}
if (!ANoAlias->isAssumedNoAlias()) {
@@ -12706,7 +12678,7 @@ struct AAInvariantLoadPointerImpl
// try to infer noalias from argument attribute, since it is applicable for
// the duration of the function
- if (const auto *Arg = getAssociatedArgument()) {
+ if (const Argument *Arg = getAssociatedArgument()) {
if (Arg->hasNoAliasAttr()) {
addKnownBits(IS_NOALIAS);
return ChangeStatus::UNCHANGED;
@@ -12721,7 +12693,7 @@ struct AAInvariantLoadPointerImpl
return ChangeStatus::UNCHANGED;
}
- ChangeStatus checkNoEffect(Attributor &A) {
+ ChangeStatus updateNoEffect(Attributor &A) {
if (isKnown(IS_NOEFFECT) || !isAssumed(IS_NOEFFECT))
return ChangeStatus::UNCHANGED;
@@ -12729,10 +12701,8 @@ struct AAInvariantLoadPointerImpl
return indicatePessimisticFixpoint();
const auto HasNoEffectLoads = [&](const Use &U, bool &) {
- if (const auto *LI = dyn_cast<LoadInst>(U.getUser()))
- return !LI->mayHaveSideEffects();
-
- return true;
+ const auto *LI = dyn_cast<LoadInst>(U.getUser());
+ return !LI || !LI->mayHaveSideEffects();
};
if (!A.checkForAllUses(HasNoEffectLoads, *this, getAssociatedValue()))
return indicatePessimisticFixpoint();
@@ -12751,7 +12721,7 @@ struct AAInvariantLoadPointerImpl
return ChangeStatus::UNCHANGED;
}
- if (const auto *Arg = getAssociatedArgument()) {
+ if (const Argument *Arg = getAssociatedArgument()) {
if (Arg->onlyReadsMemory()) {
addKnownBits(IS_NOEFFECT);
return ChangeStatus::UNCHANGED;
@@ -12764,6 +12734,47 @@ struct AAInvariantLoadPointerImpl
return ChangeStatus::UNCHANGED;
}
+
+ ChangeStatus updateLocalInvariance(Attributor &A) {
+ if (isKnown(IS_LOCALLY_INVARIANT) || !isAssumed(IS_LOCALLY_INVARIANT))
+ return ChangeStatus::UNCHANGED;
+
+ // try to infer invariance from underlying objects
+ const auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(
+ getIRPosition(), this, DepClassTy::REQUIRED);
+ if (!AUO)
+ return ChangeStatus::UNCHANGED;
+
+ bool UsedAssumedInformation = false;
+ const auto IsLocallyInvariantLoadIfPointer = [&](const Value &V) {
+ if (!V.getType()->isPointerTy())
+ return true;
+ const auto *IsInvariantLoadPointer =
+ A.getOrCreateAAFor<AAInvariantLoadPointer>(IRPosition::value(V), this,
+ DepClassTy::REQUIRED);
+ // conservatively fail if invariance cannot be inferred
+ if (!IsInvariantLoadPointer)
+ return false;
+
+ if (IsInvariantLoadPointer->isKnownLocallyInvariant())
+ return true;
+ if (!IsInvariantLoadPointer->isAssumedLocallyInvariant())
+ return false;
+
+ UsedAssumedInformation = true;
+ return true;
+ };
+ if (!AUO->forallUnderlyingObjects(IsLocallyInvariantLoadIfPointer))
+ return indicatePessimisticFixpoint();
+
+ if (!UsedAssumedInformation) {
+ // pointer is known (not assumed) to be locally invariant
+ addKnownBits(IS_LOCALLY_INVARIANT);
+ return ChangeStatus::CHANGED;
+ }
+
+ return ChangeStatus::UNCHANGED;
+ }
};
struct AAInvariantLoadPointerFloating final : AAInvariantLoadPointerImpl {
@@ -12786,7 +12797,7 @@ struct AAInvariantLoadPointerCallSiteReturned final
: AAInvariantLoadPointerImpl(IRP, A) {}
void initialize(Attributor &A) override {
- const auto *F = getAssociatedFunction();
+ const Function *F = getAssociatedFunction();
assert(F && "no associated function for return from call");
// not much we can say about opaque functions
@@ -12808,16 +12819,21 @@ struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl {
: AAInvariantLoadPointerImpl(IRP, A) {}
void initialize(Attributor &) override {
- const auto *F = getAssociatedFunction();
+ const Function *F = getAssociatedFunction();
assert(F && "no associated function to argument");
- if (isCallableCC(F->getCallingConv()) && !F->hasLocalLinkage())
+ if (!isCallableCC(F->getCallingConv())) {
+ addKnownBits(IS_LOCALLY_CONSTRAINED);
+ return;
+ }
+
+ if (!F->hasLocalLinkage())
removeAssumedBits(IS_LOCALLY_CONSTRAINED);
}
protected:
virtual bool requiresNoAlias() const override {
- const auto *F = getAssociatedFunction();
+ const Function *F = getAssociatedFunction();
assert(F && "no associated function to argument");
return !isCallableCC(F->getCallingConv());
}
>From 692876e8aa07ef453c6f94d268f2df59f2b5c5b7 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <zgoldtho at ualberta.ca>
Date: Tue, 3 Jun 2025 16:51:49 -0500
Subject: [PATCH 7/7] Incorporated feedback.
---
.../Transforms/IPO/AttributorAttributes.cpp | 33 ++++++++++---------
.../{ => AMDGPU}/tag-invariant-loads.ll | 5 +--
2 files changed, 20 insertions(+), 18 deletions(-)
rename llvm/test/Transforms/Attributor/{ => AMDGPU}/tag-invariant-loads.ll (99%)
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 66436262bf1f7..0c267bfa455fa 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12643,11 +12643,22 @@ struct AAInvariantLoadPointerImpl
/// See AbstractAttribute::trackStatistics().
void trackStatistics() const override {}
-protected:
- /// Indicate that invariance necessarily requires the pointer to be noalias.
- virtual bool requiresNoAlias() const { return false; }
-
private:
+ /// Indicate that noalias is required for the pointer to be invariant.
+ bool requiresNoAlias() const {
+ switch (getPositionKind()) {
+ default:
+ return false;
+ case IRP_CALL_SITE_RETURNED:
+ return true;
+ case IRP_ARGUMENT: {
+ const Function *F = getAssociatedFunction();
+ assert(F && "no associated function for argument");
+ return !isCallableCC(F->getCallingConv());
+ }
+ }
+ }
+
bool isExternal() const {
const Function *F = getAssociatedFunction();
if (!F)
@@ -12800,7 +12811,7 @@ struct AAInvariantLoadPointerCallSiteReturned final
const Function *F = getAssociatedFunction();
assert(F && "no associated function for return from call");
- // not much we can say about opaque functions
+ // There is not much we can say about opaque functions.
if (F->isDeclaration() || F->isIntrinsic()) {
if (!F->onlyReadsMemory() || !F->hasNoSync()) {
indicatePessimisticFixpoint();
@@ -12809,9 +12820,6 @@ struct AAInvariantLoadPointerCallSiteReturned final
}
AAInvariantLoadPointerImpl::initialize(A);
}
-
-protected:
- virtual bool requiresNoAlias() const override { return true; }
};
struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl {
@@ -12820,7 +12828,7 @@ struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl {
void initialize(Attributor &) override {
const Function *F = getAssociatedFunction();
- assert(F && "no associated function to argument");
+ assert(F && "no associated function for argument");
if (!isCallableCC(F->getCallingConv())) {
addKnownBits(IS_LOCALLY_CONSTRAINED);
@@ -12830,13 +12838,6 @@ struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl {
if (!F->hasLocalLinkage())
removeAssumedBits(IS_LOCALLY_CONSTRAINED);
}
-
-protected:
- virtual bool requiresNoAlias() const override {
- const Function *F = getAssociatedFunction();
- assert(F && "no associated function to argument");
- return !isCallableCC(F->getCallingConv());
- }
};
struct AAInvariantLoadPointerCallSiteArgument final
diff --git a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
similarity index 99%
rename from llvm/test/Transforms/Attributor/tag-invariant-loads.ll
rename to llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
index 4cbf3f8edc8c6..3cf6759a28b53 100644
--- a/llvm/test/Transforms/Attributor/tag-invariant-loads.ll
+++ b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll
@@ -6,8 +6,6 @@ declare void @clobber(i32) #0
declare ptr addrspace(1) @get_ptr() #0
declare noalias ptr addrspace(1) @get_noalias_ptr() #0
declare noalias ptr addrspace(1) @get_untouched_ptr() #1
-attributes #0 = { nofree norecurse nosync nounwind willreturn }
-attributes #1 = { nofree norecurse nosync nounwind willreturn readonly }
define void @test_nonkernel(ptr addrspace(1) noalias %ptr) {
; AMDGCN-LABEL: define void @test_nonkernel(
@@ -319,6 +317,9 @@ finish:
call void @clobber(i32 %val)
ret void
}
+
+attributes #0 = { nofree norecurse nosync nounwind willreturn }
+attributes #1 = { nofree norecurse nosync nounwind willreturn readonly }
;.
; AMDGCN: [[META0]] = !{}
;.
More information about the llvm-commits
mailing list