[llvm] 181b014 - Attributor: Infer noalias.addrspace metadata for memory instructions (#136553)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 7 18:50:34 PDT 2025
Author: Shoreshen
Date: 2025-07-08T09:50:31+08:00
New Revision: 181b014c06d9130455f95dcae44d284e215e0efc
URL: https://github.com/llvm/llvm-project/commit/181b014c06d9130455f95dcae44d284e215e0efc
DIFF: https://github.com/llvm/llvm-project/commit/181b014c06d9130455f95dcae44d284e215e0efc.diff
LOG: Attributor: Infer noalias.addrspace metadata for memory instructions (#136553)
Add noalias.addrspace metadata for store, load and atomic instruction in
AMDGPU backend.
Added:
llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
Modified:
llvm/include/llvm/Transforms/IPO/Attributor.h
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
llvm/lib/Transforms/IPO/AttributorAttributes.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index f19f3292c4798..97876877ea67b 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -99,6 +99,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
@@ -1355,6 +1356,8 @@ struct InformationCache {
/// Return the flat address space if the associated target has.
LLVM_ABI std::optional<unsigned> getFlatAddressSpace() const;
+ virtual unsigned getMaxAddrSpace() const { return ~0U; }
+
private:
struct FunctionInfo {
LLVM_ABI ~FunctionInfo();
@@ -6420,6 +6423,47 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
static const uint32_t InvalidAddressSpace = ~0U;
};
+/// An abstract interface for potential address space information.
+struct AANoAliasAddrSpace
+ : public StateWrapper<BooleanState, AbstractAttribute> {
+ using Base = StateWrapper<BooleanState, AbstractAttribute>;
+ using RangeMap = IntervalMap<unsigned, bool>;
+ AANoAliasAddrSpace(const IRPosition &IRP, Attributor &A)
+ : Base(IRP), Map(Allocator) {}
+
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ if (!IRP.getAssociatedType()->isPtrOrPtrVectorTy())
+ return false;
+ return AbstractAttribute::isValidIRPositionForInit(A, IRP);
+ }
+
+ /// See AbstractAttribute::requiresCallersForArgOrFunction
+ static bool requiresCallersForArgOrFunction() { return true; }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ LLVM_ABI static AANoAliasAddrSpace &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+ /// See AbstractAttribute::getName()
+ StringRef getName() const override { return "AANoAliasAddrSpace"; }
+
+ /// See AbstractAttribute::getIdAddr()
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is
+ /// AAAssumptionInfo
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ /// Unique ID (due to the unique address)
+ LLVM_ABI static const char ID;
+
+protected:
+ RangeMap::Allocator Allocator;
+ RangeMap Map;
+};
+
struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> {
AAAllocationInfo(const IRPosition &IRP, Attributor &A)
: StateWrapper<BooleanState, AbstractAttribute>(IRP) {}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index fef22c81c9391..79cf49f88d6dc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -235,6 +235,10 @@ class AMDGPUInformationCache : public InformationCache {
return ST.getMaxWavesPerEU();
}
+ unsigned getMaxAddrSpace() const override {
+ return AMDGPUAS::MAX_AMDGPU_ADDRESS;
+ }
+
private:
/// Check if the ConstantExpr \p CE uses an addrspacecast from private or
/// local to flat. These casts may require the queue pointer.
@@ -1380,8 +1384,8 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
&AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
&AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
- &AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
- &AAInstanceInfo::ID});
+ &AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID,
+ &AAIndirectCallInfo::ID, &AAInstanceInfo::ID});
AttributorConfig AC(CGUpdater);
AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1420,18 +1424,19 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
}
for (auto &I : instructions(F)) {
- if (auto *LI = dyn_cast<LoadInst>(&I)) {
- A.getOrCreateAAFor<AAAddressSpace>(
- IRPosition::value(*LI->getPointerOperand()));
- } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
- A.getOrCreateAAFor<AAAddressSpace>(
- IRPosition::value(*SI->getPointerOperand()));
- } else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I)) {
- A.getOrCreateAAFor<AAAddressSpace>(
- IRPosition::value(*RMW->getPointerOperand()));
- } else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I)) {
- A.getOrCreateAAFor<AAAddressSpace>(
- IRPosition::value(*CmpX->getPointerOperand()));
+ Value *Ptr = nullptr;
+ if (auto *LI = dyn_cast<LoadInst>(&I))
+ Ptr = LI->getPointerOperand();
+ else if (auto *SI = dyn_cast<StoreInst>(&I))
+ Ptr = SI->getPointerOperand();
+ else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
+ Ptr = RMW->getPointerOperand();
+ else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
+ Ptr = CmpX->getPointerOperand();
+
+ if (Ptr) {
+ A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
+ A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
}
}
}
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 79a7a2a95d1d2..ed2ac4dbfeecd 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -193,6 +193,7 @@ PIPE_OPERATOR(AAAssumptionInfo)
PIPE_OPERATOR(AAUnderlyingObjects)
PIPE_OPERATOR(AAInvariantLoadPointer)
PIPE_OPERATOR(AAAddressSpace)
+PIPE_OPERATOR(AANoAliasAddrSpace)
PIPE_OPERATOR(AAAllocationInfo)
PIPE_OPERATOR(AAIndirectCallInfo)
PIPE_OPERATOR(AAGlobalValueInfo)
@@ -13146,6 +13147,197 @@ struct AAAddressSpaceCallSiteArgument final : AAAddressSpaceImpl {
};
} // namespace
+/// ------------------------ No Alias Address Space ---------------------------
+// This attribute assumes flat address space can alias all other address space
+
+// TODO: this is similar to AAAddressSpace, most of the code should be merged.
+// But merging it created failing cased on gateway test that cannot be
+// reproduced locally. So should open a seperated PR to hande the merge of
+// AANoAliasAddrSpace and AAAddressSpace attribute
+
+namespace {
+struct AANoAliasAddrSpaceImpl : public AANoAliasAddrSpace {
+ AANoAliasAddrSpaceImpl(const IRPosition &IRP, Attributor &A)
+ : AANoAliasAddrSpace(IRP, A) {}
+
+ void initialize(Attributor &A) override {
+ assert(getAssociatedType()->isPtrOrPtrVectorTy() &&
+ "Associated value is not a pointer");
+
+ resetASRanges(A);
+
+ std::optional<unsigned> FlatAS = A.getInfoCache().getFlatAddressSpace();
+ if (!FlatAS.has_value()) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+
+ removeAS(*FlatAS);
+
+ unsigned AS = getAssociatedType()->getPointerAddressSpace();
+ if (AS != *FlatAS) {
+ removeAS(AS);
+ indicateOptimisticFixpoint();
+ }
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
+ uint32_t OldAssumed = getAssumed();
+
+ auto CheckAddressSpace = [&](Value &Obj) {
+ if (isa<PoisonValue>(&Obj))
+ return true;
+
+ unsigned AS = Obj.getType()->getPointerAddressSpace();
+ if (AS == FlatAS)
+ return false;
+
+ removeAS(Obj.getType()->getPointerAddressSpace());
+ return true;
+ };
+
+ const AAUnderlyingObjects *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(
+ getIRPosition(), this, DepClassTy::REQUIRED);
+ if (!AUO->forallUnderlyingObjects(CheckAddressSpace))
+ return indicatePessimisticFixpoint();
+
+ return OldAssumed == getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
+
+ unsigned AS = getAssociatedType()->getPointerAddressSpace();
+ if (AS != FlatAS || Map.empty())
+ return ChangeStatus::UNCHANGED;
+
+ LLVMContext &Ctx = getAssociatedValue().getContext();
+ MDNode *NoAliasASNode = nullptr;
+ MDBuilder MDB(Ctx);
+ // Has to use iterator to get the range info.
+ for (RangeMap::const_iterator I = Map.begin(); I != Map.end(); I++) {
+ if (!I.value())
+ continue;
+ unsigned Upper = I.stop();
+ unsigned Lower = I.start();
+ if (!NoAliasASNode) {
+ NoAliasASNode = MDB.createRange(APInt(32, Lower), APInt(32, Upper + 1));
+ continue;
+ }
+ MDNode *ASRange = MDB.createRange(APInt(32, Lower), APInt(32, Upper + 1));
+ NoAliasASNode = MDNode::getMostGenericRange(NoAliasASNode, ASRange);
+ }
+
+ Value *AssociatedValue = &getAssociatedValue();
+ bool Changed = false;
+
+ auto AddNoAliasAttr = [&](const Use &U, bool &) {
+ if (U.get() != AssociatedValue)
+ return true;
+ Instruction *Inst = dyn_cast<Instruction>(U.getUser());
+ if (!Inst || Inst->hasMetadata(LLVMContext::MD_noalias_addrspace))
+ return true;
+ if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst) &&
+ !isa<AtomicCmpXchgInst>(Inst) && !isa<AtomicRMWInst>(Inst))
+ return true;
+ if (!A.isRunOn(Inst->getFunction()))
+ return true;
+ Inst->setMetadata(LLVMContext::MD_noalias_addrspace, NoAliasASNode);
+ Changed = true;
+ return true;
+ };
+ (void)A.checkForAllUses(AddNoAliasAttr, *this, *AssociatedValue,
+ /*CheckBBLivenessOnly=*/true);
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr(Attributor *A) const override {
+ if (!isValidState())
+ return "<invalid>";
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << "CanNotBeAddrSpace(";
+ for (RangeMap::const_iterator I = Map.begin(); I != Map.end(); I++) {
+ unsigned Upper = I.stop();
+ unsigned Lower = I.start();
+ OS << ' ' << '[' << Upper << ',' << Lower + 1 << ')';
+ }
+ OS << " )";
+ return OS.str();
+ }
+
+private:
+ void removeAS(unsigned AS) {
+ RangeMap::iterator I = Map.find(AS);
+
+ if (I != Map.end()) {
+ unsigned Upper = I.stop();
+ unsigned Lower = I.start();
+ I.erase();
+ if (Upper == Lower)
+ return;
+ if (AS != ~((unsigned)0) && AS + 1 <= Upper)
+ Map.insert(AS + 1, Upper, /*what ever this variable name is=*/true);
+ if (AS != 0 && Lower <= AS - 1)
+ Map.insert(Lower, AS - 1, true);
+ }
+ }
+
+ void resetASRanges(Attributor &A) {
+ Map.clear();
+ Map.insert(0, A.getInfoCache().getMaxAddrSpace(), true);
+ }
+};
+
+struct AANoAliasAddrSpaceFloating final : AANoAliasAddrSpaceImpl {
+ AANoAliasAddrSpaceFloating(const IRPosition &IRP, Attributor &A)
+ : AANoAliasAddrSpaceImpl(IRP, A) {}
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(noaliasaddrspace);
+ }
+};
+
+struct AANoAliasAddrSpaceReturned final : AANoAliasAddrSpaceImpl {
+ AANoAliasAddrSpaceReturned(const IRPosition &IRP, Attributor &A)
+ : AANoAliasAddrSpaceImpl(IRP, A) {}
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FNRET_ATTR(noaliasaddrspace);
+ }
+};
+
+struct AANoAliasAddrSpaceCallSiteReturned final : AANoAliasAddrSpaceImpl {
+ AANoAliasAddrSpaceCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AANoAliasAddrSpaceImpl(IRP, A) {}
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSRET_ATTR(noaliasaddrspace);
+ }
+};
+
+struct AANoAliasAddrSpaceArgument final : AANoAliasAddrSpaceImpl {
+ AANoAliasAddrSpaceArgument(const IRPosition &IRP, Attributor &A)
+ : AANoAliasAddrSpaceImpl(IRP, A) {}
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_ARG_ATTR(noaliasaddrspace);
+ }
+};
+
+struct AANoAliasAddrSpaceCallSiteArgument final : AANoAliasAddrSpaceImpl {
+ AANoAliasAddrSpaceCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AANoAliasAddrSpaceImpl(IRP, A) {}
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSARG_ATTR(noaliasaddrspace);
+ }
+};
+} // namespace
/// ----------- Allocation Info ----------
namespace {
struct AAAllocationInfoImpl : public AAAllocationInfo {
@@ -13400,6 +13592,7 @@ const char AAAssumptionInfo::ID = 0;
const char AAUnderlyingObjects::ID = 0;
const char AAInvariantLoadPointer::ID = 0;
const char AAAddressSpace::ID = 0;
+const char AANoAliasAddrSpace::ID = 0;
const char AAAllocationInfo::ID = 0;
const char AAIndirectCallInfo::ID = 0;
const char AAGlobalValueInfo::ID = 0;
@@ -13535,6 +13728,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFPClass)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInvariantLoadPointer)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAddressSpace)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAliasAddrSpace)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAllocationInfo)
CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify)
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll b/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
new file mode 100644
index 0000000000000..7ce5a00c0bf36
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
@@ -0,0 +1,639 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck %s
+
+ at gptr = addrspace(1) externally_initialized global i32 0, align 4
+ at gptr2 = addrspace(4) externally_initialized global i32 0, align 4
+ at gptr3 = addrspace(3) externally_initialized global i32 0, align 4
+
+define amdgpu_kernel void @no_alias_addr_space_select(ptr addrspace(3) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_select(
+; CHECK-SAME: ptr addrspace(3) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT: [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
+; CHECK-NEXT: [[C:%.*]] = addrspacecast ptr addrspace(3) [[SPTR]] to ptr
+; CHECK-NEXT: [[ADD_A:%.*]] = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(1) @gptr to ptr), i32 [[OFFSET]]
+; CHECK-NEXT: [[PTR:%.*]] = select i1 [[COND1]], ptr [[ADD_A]], ptr [[B]]
+; CHECK-NEXT: [[PTR2:%.*]] = select i1 [[COND2]], ptr [[PTR]], ptr [[C]]
+; CHECK-NEXT: store i32 [[VAL]], ptr [[PTR2]], align 4, !noalias.addrspace [[META0:![0-9]+]]
+; CHECK-NEXT: ret void
+;
+ %lptr = alloca i32, align 4, addrspace(5)
+ %a = addrspacecast ptr addrspace(1) @gptr to ptr
+ %b = addrspacecast ptr addrspace(5) %lptr to ptr
+ %c = addrspacecast ptr addrspace(3) %sptr to ptr
+ %add_a = getelementptr inbounds i8, ptr %a, i32 %offset
+ %ptr = select i1 %cond1, ptr %add_a, ptr %b
+ %ptr2 = select i1 %cond2, ptr %ptr, ptr %c
+ store i32 %val, ptr %ptr2
+ ret void
+}
+
+define amdgpu_kernel void @no_alias_addr_space_branch(ptr addrspace(3) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_branch(
+; CHECK-SAME: ptr addrspace(3) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br i1 [[COND1]], label %[[BB_1_TRUE:.*]], label %[[BB_1_FALSE:.*]]
+; CHECK: [[BB_1_TRUE]]:
+; CHECK-NEXT: [[A:%.*]] = addrspacecast ptr addrspace(1) @gptr to ptr
+; CHECK-NEXT: br label %[[BB_1_END:.*]]
+; CHECK: [[BB_1_FALSE]]:
+; CHECK-NEXT: [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT: [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
+; CHECK-NEXT: br label %[[BB_1_END]]
+; CHECK: [[BB_1_END]]:
+; CHECK-NEXT: [[PTR1:%.*]] = phi ptr [ [[A]], %[[BB_1_TRUE]] ], [ [[B]], %[[BB_1_FALSE]] ]
+; CHECK-NEXT: br i1 [[COND2]], label %[[BB_2_TRUE:.*]], label %[[BB_2_END:.*]]
+; CHECK: [[BB_2_TRUE]]:
+; CHECK-NEXT: [[C:%.*]] = addrspacecast ptr addrspace(3) [[SPTR]] to ptr
+; CHECK-NEXT: br label %[[BB_2_END]]
+; CHECK: [[BB_2_END]]:
+; CHECK-NEXT: [[PTR2:%.*]] = phi ptr [ [[PTR1]], %[[BB_1_END]] ], [ [[C]], %[[BB_2_TRUE]] ]
+; CHECK-NEXT: store i32 [[VAL]], ptr [[PTR2]], align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: ret void
+;
+ br i1 %cond1, label %bb.1.true, label %bb.1.false
+bb.1.true:
+ %a = addrspacecast ptr addrspace(1) @gptr to ptr
+ br label %bb.1.end
+
+bb.1.false:
+ %lptr = alloca i32, align 4, addrspace(5)
+ %b = addrspacecast ptr addrspace(5) %lptr to ptr
+ br label %bb.1.end
+
+bb.1.end:
+ %ptr1 = phi ptr [ %a, %bb.1.true ], [ %b, %bb.1.false ]
+ br i1 %cond2, label %bb.2.true, label %bb.2.end
+
+bb.2.true:
+ %c = addrspacecast ptr addrspace(3) %sptr to ptr
+ br label %bb.2.end
+
+bb.2.end:
+ %ptr2 = phi ptr [ %ptr1, %bb.1.end ], [ %c, %bb.2.true ]
+ store i32 %val, ptr %ptr2
+ ret void
+}
+
+define amdgpu_kernel void @no_alias_addr_space_select_cmpxchg(ptr addrspace(3) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_select_cmpxchg(
+; CHECK-SAME: ptr addrspace(3) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT: [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
+; CHECK-NEXT: [[C:%.*]] = addrspacecast ptr addrspace(3) [[SPTR]] to ptr
+; CHECK-NEXT: [[ADD_A:%.*]] = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(1) @gptr to ptr), i32 [[OFFSET]]
+; CHECK-NEXT: [[PTR:%.*]] = select i1 [[COND1]], ptr [[ADD_A]], ptr [[B]]
+; CHECK-NEXT: [[PTR2:%.*]] = select i1 [[COND2]], ptr [[PTR]], ptr [[C]]
+; CHECK-NEXT: [[CMPXCHG_0:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 4 monotonic monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[CMPXCHG_1:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 5 acq_rel monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[CMPXCHG_2:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 6 acquire monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[CMPXCHG_3:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 7 release monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[CMPXCHG_4:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 8 seq_cst monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[CMPXCHG_5:%.*]] = cmpxchg weak ptr [[PTR2]], i32 0, i32 9 seq_cst monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[CMPXCHG_6:%.*]] = cmpxchg volatile ptr [[PTR2]], i32 0, i32 10 seq_cst monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[CMPXCHG_7:%.*]] = cmpxchg weak volatile ptr [[PTR2]], i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: ret void
+;
+ %lptr = alloca i32, align 4, addrspace(5)
+ %a = addrspacecast ptr addrspace(1) @gptr to ptr
+ %b = addrspacecast ptr addrspace(5) %lptr to ptr
+ %c = addrspacecast ptr addrspace(3) %sptr to ptr
+ %add_a = getelementptr inbounds i8, ptr %a, i32 %offset
+ %ptr = select i1 %cond1, ptr %add_a, ptr %b
+ %ptr2 = select i1 %cond2, ptr %ptr, ptr %c
+ %cmpxchg.0 = cmpxchg ptr %ptr2, i32 0, i32 4 monotonic monotonic, align 4
+ %cmpxchg.1 = cmpxchg ptr %ptr2, i32 0, i32 5 acq_rel monotonic, align 4
+ %cmpxchg.2 = cmpxchg ptr %ptr2, i32 0, i32 6 acquire monotonic, align 4
+ %cmpxchg.3 = cmpxchg ptr %ptr2, i32 0, i32 7 release monotonic, align 4
+ %cmpxchg.4 = cmpxchg ptr %ptr2, i32 0, i32 8 seq_cst monotonic, align 4
+ %cmpxchg.5 = cmpxchg weak ptr %ptr2, i32 0, i32 9 seq_cst monotonic, align 4
+ %cmpxchg.6 = cmpxchg volatile ptr %ptr2, i32 0, i32 10 seq_cst monotonic, align 4
+ %cmpxchg.7 = cmpxchg weak volatile ptr %ptr2, i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4
+ ret void
+}
+
+define amdgpu_kernel void @no_alias_addr_space_branch_cmpxchg(ptr addrspace(3) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_branch_cmpxchg(
+; CHECK-SAME: ptr addrspace(3) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br i1 [[COND1]], label %[[BB_1_TRUE:.*]], label %[[BB_1_FALSE:.*]]
+; CHECK: [[BB_1_TRUE]]:
+; CHECK-NEXT: [[A:%.*]] = addrspacecast ptr addrspace(1) @gptr to ptr
+; CHECK-NEXT: br label %[[BB_1_END:.*]]
+; CHECK: [[BB_1_FALSE]]:
+; CHECK-NEXT: [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT: [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
+; CHECK-NEXT: br label %[[BB_1_END]]
+; CHECK: [[BB_1_END]]:
+; CHECK-NEXT: [[PTR1:%.*]] = phi ptr [ [[A]], %[[BB_1_TRUE]] ], [ [[B]], %[[BB_1_FALSE]] ]
+; CHECK-NEXT: br i1 [[COND2]], label %[[BB_2_TRUE:.*]], label %[[BB_2_END:.*]]
+; CHECK: [[BB_2_TRUE]]:
+; CHECK-NEXT: [[C:%.*]] = addrspacecast ptr addrspace(3) [[SPTR]] to ptr
+; CHECK-NEXT: br label %[[BB_2_END]]
+; CHECK: [[BB_2_END]]:
+; CHECK-NEXT: [[PTR2:%.*]] = phi ptr [ [[PTR1]], %[[BB_1_END]] ], [ [[C]], %[[BB_2_TRUE]] ]
+; CHECK-NEXT: [[CMPXCHG_0:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 4 monotonic monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[CMPXCHG_1:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 5 acq_rel monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[CMPXCHG_2:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 6 acquire monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[CMPXCHG_3:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 7 release monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[CMPXCHG_4:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 8 seq_cst monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[CMPXCHG_5:%.*]] = cmpxchg weak ptr [[PTR2]], i32 0, i32 9 seq_cst monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[CMPXCHG_6:%.*]] = cmpxchg volatile ptr [[PTR2]], i32 0, i32 10 seq_cst monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[CMPXCHG_7:%.*]] = cmpxchg weak volatile ptr [[PTR2]], i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: ret void
+;
+ br i1 %cond1, label %bb.1.true, label %bb.1.false
+bb.1.true:
+ %a = addrspacecast ptr addrspace(1) @gptr to ptr
+ br label %bb.1.end
+
+bb.1.false:
+ %lptr = alloca i32, align 4, addrspace(5)
+ %b = addrspacecast ptr addrspace(5) %lptr to ptr
+ br label %bb.1.end
+
+bb.1.end:
+ %ptr1 = phi ptr [ %a, %bb.1.true ], [ %b, %bb.1.false ]
+ br i1 %cond2, label %bb.2.true, label %bb.2.end
+
+bb.2.true:
+ %c = addrspacecast ptr addrspace(3) %sptr to ptr
+ br label %bb.2.end
+
+bb.2.end:
+ %ptr2 = phi ptr [ %ptr1, %bb.1.end ], [ %c, %bb.2.true ]
+ %cmpxchg.0 = cmpxchg ptr %ptr2, i32 0, i32 4 monotonic monotonic, align 4
+ %cmpxchg.1 = cmpxchg ptr %ptr2, i32 0, i32 5 acq_rel monotonic, align 4
+ %cmpxchg.2 = cmpxchg ptr %ptr2, i32 0, i32 6 acquire monotonic, align 4
+ %cmpxchg.3 = cmpxchg ptr %ptr2, i32 0, i32 7 release monotonic, align 4
+ %cmpxchg.4 = cmpxchg ptr %ptr2, i32 0, i32 8 seq_cst monotonic, align 4
+ %cmpxchg.5 = cmpxchg weak ptr %ptr2, i32 0, i32 9 seq_cst monotonic, align 4
+ %cmpxchg.6 = cmpxchg volatile ptr %ptr2, i32 0, i32 10 seq_cst monotonic, align 4
+ %cmpxchg.7 = cmpxchg weak volatile ptr %ptr2, i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4
+ ret void
+}
+
+define amdgpu_kernel void @no_alias_addr_space_select_atomicrmw(ptr addrspace(3) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_select_atomicrmw(
+; CHECK-SAME: ptr addrspace(3) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT: [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
+; CHECK-NEXT: [[C:%.*]] = addrspacecast ptr addrspace(3) [[SPTR]] to ptr
+; CHECK-NEXT: [[ADD_A:%.*]] = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(1) @gptr to ptr), i32 [[OFFSET]]
+; CHECK-NEXT: [[PTR:%.*]] = select i1 [[COND1]], ptr [[ADD_A]], ptr [[B]]
+; CHECK-NEXT: [[PTR2:%.*]] = select i1 [[COND2]], ptr [[PTR]], ptr [[C]]
+; CHECK-NEXT: [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr [[PTR2]], i32 12 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_ADD:%.*]] = atomicrmw add ptr [[PTR2]], i32 13 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_SUB:%.*]] = atomicrmw sub ptr [[PTR2]], i32 14 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_AND:%.*]] = atomicrmw and ptr [[PTR2]], i32 15 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_NAND:%.*]] = atomicrmw nand ptr [[PTR2]], i32 16 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_OR:%.*]] = atomicrmw or ptr [[PTR2]], i32 17 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_XOR:%.*]] = atomicrmw xor ptr [[PTR2]], i32 18 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_MAX:%.*]] = atomicrmw max ptr [[PTR2]], i32 19 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_MIN:%.*]] = atomicrmw volatile min ptr [[PTR2]], i32 20 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_UMAX:%.*]] = atomicrmw umax ptr [[PTR2]], i32 21 syncscope("singlethread") monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_UMIN:%.*]] = atomicrmw volatile umin ptr [[PTR2]], i32 22 syncscope("singlethread") monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: ret void
+;
+ %lptr = alloca i32, align 4, addrspace(5)
+ %a = addrspacecast ptr addrspace(1) @gptr to ptr
+ %b = addrspacecast ptr addrspace(5) %lptr to ptr
+ %c = addrspacecast ptr addrspace(3) %sptr to ptr
+ %add_a = getelementptr inbounds i8, ptr %a, i32 %offset
+ %ptr = select i1 %cond1, ptr %add_a, ptr %b
+ %ptr2 = select i1 %cond2, ptr %ptr, ptr %c
+ %atomicrmw.xchg = atomicrmw xchg ptr %ptr2, i32 12 monotonic, align 4
+ %atomicrmw.add = atomicrmw add ptr %ptr2, i32 13 monotonic, align 4
+ %atomicrmw.sub = atomicrmw sub ptr %ptr2, i32 14 monotonic, align 4
+ %atomicrmw.and = atomicrmw and ptr %ptr2, i32 15 monotonic, align 4
+ %atomicrmw.nand = atomicrmw nand ptr %ptr2, i32 16 monotonic, align 4
+ %atomicrmw.or = atomicrmw or ptr %ptr2, i32 17 monotonic, align 4
+ %atomicrmw.xor = atomicrmw xor ptr %ptr2, i32 18 monotonic, align 4
+ %atomicrmw.max = atomicrmw max ptr %ptr2, i32 19 monotonic, align 4
+ %atomicrmw.min = atomicrmw volatile min ptr %ptr2, i32 20 monotonic, align 4
+ %atomicrmw.umax = atomicrmw umax ptr %ptr2, i32 21 syncscope("singlethread") monotonic, align 4
+ %atomicrmw.umin = atomicrmw volatile umin ptr %ptr2, i32 22 syncscope("singlethread") monotonic, align 4
+ ret void
+}
+
+define amdgpu_kernel void @no_alias_addr_space_branch_atomicrmw(ptr addrspace(3) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_branch_atomicrmw(
+; CHECK-SAME: ptr addrspace(3) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br i1 [[COND1]], label %[[BB_1_TRUE:.*]], label %[[BB_1_FALSE:.*]]
+; CHECK: [[BB_1_TRUE]]:
+; CHECK-NEXT: [[A:%.*]] = addrspacecast ptr addrspace(1) @gptr to ptr
+; CHECK-NEXT: br label %[[BB_1_END:.*]]
+; CHECK: [[BB_1_FALSE]]:
+; CHECK-NEXT: [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT: [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
+; CHECK-NEXT: br label %[[BB_1_END]]
+; CHECK: [[BB_1_END]]:
+; CHECK-NEXT: [[PTR1:%.*]] = phi ptr [ [[A]], %[[BB_1_TRUE]] ], [ [[B]], %[[BB_1_FALSE]] ]
+; CHECK-NEXT: br i1 [[COND2]], label %[[BB_2_TRUE:.*]], label %[[BB_2_END:.*]]
+; CHECK: [[BB_2_TRUE]]:
+; CHECK-NEXT: [[C:%.*]] = addrspacecast ptr addrspace(3) [[SPTR]] to ptr
+; CHECK-NEXT: br label %[[BB_2_END]]
+; CHECK: [[BB_2_END]]:
+; CHECK-NEXT: [[PTR2:%.*]] = phi ptr [ [[PTR1]], %[[BB_1_END]] ], [ [[C]], %[[BB_2_TRUE]] ]
+; CHECK-NEXT: [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr [[PTR2]], i32 12 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_ADD:%.*]] = atomicrmw add ptr [[PTR2]], i32 13 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_SUB:%.*]] = atomicrmw sub ptr [[PTR2]], i32 14 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_AND:%.*]] = atomicrmw and ptr [[PTR2]], i32 15 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_NAND:%.*]] = atomicrmw nand ptr [[PTR2]], i32 16 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_OR:%.*]] = atomicrmw or ptr [[PTR2]], i32 17 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_XOR:%.*]] = atomicrmw xor ptr [[PTR2]], i32 18 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_MAX:%.*]] = atomicrmw max ptr [[PTR2]], i32 19 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_MIN:%.*]] = atomicrmw volatile min ptr [[PTR2]], i32 20 monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_UMAX:%.*]] = atomicrmw umax ptr [[PTR2]], i32 21 syncscope("singlethread") monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: [[ATOMICRMW_UMIN:%.*]] = atomicrmw volatile umin ptr [[PTR2]], i32 22 syncscope("singlethread") monotonic, align 4, !noalias.addrspace [[META0]]
+; CHECK-NEXT: ret void
+;
+ br i1 %cond1, label %bb.1.true, label %bb.1.false
+bb.1.true:
+ %a = addrspacecast ptr addrspace(1) @gptr to ptr
+ br label %bb.1.end
+
+bb.1.false:
+ %lptr = alloca i32, align 4, addrspace(5)
+ %b = addrspacecast ptr addrspace(5) %lptr to ptr
+ br label %bb.1.end
+
+bb.1.end:
+ %ptr1 = phi ptr [ %a, %bb.1.true ], [ %b, %bb.1.false ]
+ br i1 %cond2, label %bb.2.true, label %bb.2.end
+
+bb.2.true:
+ %c = addrspacecast ptr addrspace(3) %sptr to ptr
+ br label %bb.2.end
+
+bb.2.end:
+ %ptr2 = phi ptr [ %ptr1, %bb.1.end ], [ %c, %bb.2.true ]
+ %atomicrmw.xchg = atomicrmw xchg ptr %ptr2, i32 12 monotonic, align 4
+ %atomicrmw.add = atomicrmw add ptr %ptr2, i32 13 monotonic, align 4
+ %atomicrmw.sub = atomicrmw sub ptr %ptr2, i32 14 monotonic, align 4
+ %atomicrmw.and = atomicrmw and ptr %ptr2, i32 15 monotonic, align 4
+ %atomicrmw.nand = atomicrmw nand ptr %ptr2, i32 16 monotonic, align 4
+ %atomicrmw.or = atomicrmw or ptr %ptr2, i32 17 monotonic, align 4
+ %atomicrmw.xor = atomicrmw xor ptr %ptr2, i32 18 monotonic, align 4
+ %atomicrmw.max = atomicrmw max ptr %ptr2, i32 19 monotonic, align 4
+ %atomicrmw.min = atomicrmw volatile min ptr %ptr2, i32 20 monotonic, align 4
+ %atomicrmw.umax = atomicrmw umax ptr %ptr2, i32 21 syncscope("singlethread") monotonic, align 4
+ %atomicrmw.umin = atomicrmw volatile umin ptr %ptr2, i32 22 syncscope("singlethread") monotonic, align 4
+ ret void
+}
+
+define amdgpu_kernel void @no_alias_addr_space_select_cmpxchg_flat(ptr %c, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_select_cmpxchg_flat(
+; CHECK-SAME: ptr [[C:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT: [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
+; CHECK-NEXT: [[ADD_A:%.*]] = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(1) @gptr to ptr), i32 [[OFFSET]]
+; CHECK-NEXT: [[PTR:%.*]] = select i1 [[COND1]], ptr [[ADD_A]], ptr [[B]]
+; CHECK-NEXT: [[PTR2:%.*]] = select i1 [[COND2]], ptr [[PTR]], ptr [[C]]
+; CHECK-NEXT: [[CMPXCHG_0:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 4 monotonic monotonic, align 4
+; CHECK-NEXT: [[CMPXCHG_1:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 5 acq_rel monotonic, align 4
+; CHECK-NEXT: [[CMPXCHG_2:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 6 acquire monotonic, align 4
+; CHECK-NEXT: [[CMPXCHG_3:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 7 release monotonic, align 4
+; CHECK-NEXT: [[CMPXCHG_4:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 8 seq_cst monotonic, align 4
+; CHECK-NEXT: [[CMPXCHG_5:%.*]] = cmpxchg weak ptr [[PTR2]], i32 0, i32 9 seq_cst monotonic, align 4
+; CHECK-NEXT: [[CMPXCHG_6:%.*]] = cmpxchg volatile ptr [[PTR2]], i32 0, i32 10 seq_cst monotonic, align 4
+; CHECK-NEXT: [[CMPXCHG_7:%.*]] = cmpxchg weak volatile ptr [[PTR2]], i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4
+; CHECK-NEXT: ret void
+;
+ %lptr = alloca i32, align 4, addrspace(5)
+ %a = addrspacecast ptr addrspace(1) @gptr to ptr
+ %b = addrspacecast ptr addrspace(5) %lptr to ptr
+ %add_a = getelementptr inbounds i8, ptr %a, i32 %offset
+ %ptr = select i1 %cond1, ptr %add_a, ptr %b
+ %ptr2 = select i1 %cond2, ptr %ptr, ptr %c
+ %cmpxchg.0 = cmpxchg ptr %ptr2, i32 0, i32 4 monotonic monotonic, align 4
+ %cmpxchg.1 = cmpxchg ptr %ptr2, i32 0, i32 5 acq_rel monotonic, align 4
+ %cmpxchg.2 = cmpxchg ptr %ptr2, i32 0, i32 6 acquire monotonic, align 4
+ %cmpxchg.3 = cmpxchg ptr %ptr2, i32 0, i32 7 release monotonic, align 4
+ %cmpxchg.4 = cmpxchg ptr %ptr2, i32 0, i32 8 seq_cst monotonic, align 4
+ %cmpxchg.5 = cmpxchg weak ptr %ptr2, i32 0, i32 9 seq_cst monotonic, align 4
+ %cmpxchg.6 = cmpxchg volatile ptr %ptr2, i32 0, i32 10 seq_cst monotonic, align 4
+ %cmpxchg.7 = cmpxchg weak volatile ptr %ptr2, i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4
+ ret void
+}
+
+define amdgpu_kernel void @no_alias_addr_space_branch_cmpxchg_flat(ptr %c, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_branch_cmpxchg_flat(
+; CHECK-SAME: ptr [[C:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br i1 [[COND1]], label %[[BB_1_TRUE:.*]], label %[[BB_1_FALSE:.*]]
+; CHECK: [[BB_1_TRUE]]:
+; CHECK-NEXT: [[A:%.*]] = addrspacecast ptr addrspace(1) @gptr to ptr
+; CHECK-NEXT: br label %[[BB_1_END:.*]]
+; CHECK: [[BB_1_FALSE]]:
+; CHECK-NEXT: [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT: [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
+; CHECK-NEXT: br label %[[BB_1_END]]
+; CHECK: [[BB_1_END]]:
+; CHECK-NEXT: [[PTR1:%.*]] = phi ptr [ [[A]], %[[BB_1_TRUE]] ], [ [[B]], %[[BB_1_FALSE]] ]
+; CHECK-NEXT: br i1 [[COND2]], label %[[BB_2_TRUE:.*]], label %[[BB_2_END:.*]]
+; CHECK: [[BB_2_TRUE]]:
+; CHECK-NEXT: br label %[[BB_2_END]]
+; CHECK: [[BB_2_END]]:
+; CHECK-NEXT: [[PTR2:%.*]] = phi ptr [ [[PTR1]], %[[BB_1_END]] ], [ [[C]], %[[BB_2_TRUE]] ]
+; CHECK-NEXT: [[CMPXCHG_0:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 4 monotonic monotonic, align 4
+; CHECK-NEXT: [[CMPXCHG_1:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 5 acq_rel monotonic, align 4
+; CHECK-NEXT: [[CMPXCHG_2:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 6 acquire monotonic, align 4
+; CHECK-NEXT: [[CMPXCHG_3:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 7 release monotonic, align 4
+; CHECK-NEXT: [[CMPXCHG_4:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 8 seq_cst monotonic, align 4
+; CHECK-NEXT: [[CMPXCHG_5:%.*]] = cmpxchg weak ptr [[PTR2]], i32 0, i32 9 seq_cst monotonic, align 4
+; CHECK-NEXT: [[CMPXCHG_6:%.*]] = cmpxchg volatile ptr [[PTR2]], i32 0, i32 10 seq_cst monotonic, align 4
+; CHECK-NEXT: [[CMPXCHG_7:%.*]] = cmpxchg weak volatile ptr [[PTR2]], i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4
+; CHECK-NEXT: ret void
+;
+ br i1 %cond1, label %bb.1.true, label %bb.1.false
+bb.1.true:
+ %a = addrspacecast ptr addrspace(1) @gptr to ptr
+ br label %bb.1.end
+
+bb.1.false:
+ %lptr = alloca i32, align 4, addrspace(5)
+ %b = addrspacecast ptr addrspace(5) %lptr to ptr
+ br label %bb.1.end
+
+bb.1.end:
+ %ptr1 = phi ptr [ %a, %bb.1.true ], [ %b, %bb.1.false ]
+ br i1 %cond2, label %bb.2.true, label %bb.2.end
+
+bb.2.true:
+ br label %bb.2.end
+
+bb.2.end:
+ %ptr2 = phi ptr [ %ptr1, %bb.1.end ], [ %c, %bb.2.true ]
+ %cmpxchg.0 = cmpxchg ptr %ptr2, i32 0, i32 4 monotonic monotonic, align 4
+ %cmpxchg.1 = cmpxchg ptr %ptr2, i32 0, i32 5 acq_rel monotonic, align 4
+ %cmpxchg.2 = cmpxchg ptr %ptr2, i32 0, i32 6 acquire monotonic, align 4
+ %cmpxchg.3 = cmpxchg ptr %ptr2, i32 0, i32 7 release monotonic, align 4
+ %cmpxchg.4 = cmpxchg ptr %ptr2, i32 0, i32 8 seq_cst monotonic, align 4
+ %cmpxchg.5 = cmpxchg weak ptr %ptr2, i32 0, i32 9 seq_cst monotonic, align 4
+ %cmpxchg.6 = cmpxchg volatile ptr %ptr2, i32 0, i32 10 seq_cst monotonic, align 4
+ %cmpxchg.7 = cmpxchg weak volatile ptr %ptr2, i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4
+ ret void
+}
+
+define amdgpu_kernel void @no_alias_addr_space_select_atomicrmw_flat(ptr %c, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_select_atomicrmw_flat(
+; CHECK-SAME: ptr [[C:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT: [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
+; CHECK-NEXT: [[ADD_A:%.*]] = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(1) @gptr to ptr), i32 [[OFFSET]]
+; CHECK-NEXT: [[PTR:%.*]] = select i1 [[COND1]], ptr [[ADD_A]], ptr [[B]]
+; CHECK-NEXT: [[PTR2:%.*]] = select i1 [[COND2]], ptr [[PTR]], ptr [[C]]
+; CHECK-NEXT: [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr [[PTR2]], i32 12 monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_ADD:%.*]] = atomicrmw add ptr [[PTR2]], i32 13 monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_SUB:%.*]] = atomicrmw sub ptr [[PTR2]], i32 14 monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_AND:%.*]] = atomicrmw and ptr [[PTR2]], i32 15 monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_NAND:%.*]] = atomicrmw nand ptr [[PTR2]], i32 16 monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_OR:%.*]] = atomicrmw or ptr [[PTR2]], i32 17 monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_XOR:%.*]] = atomicrmw xor ptr [[PTR2]], i32 18 monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_MAX:%.*]] = atomicrmw max ptr [[PTR2]], i32 19 monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_MIN:%.*]] = atomicrmw volatile min ptr [[PTR2]], i32 20 monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_UMAX:%.*]] = atomicrmw umax ptr [[PTR2]], i32 21 syncscope("singlethread") monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_UMIN:%.*]] = atomicrmw volatile umin ptr [[PTR2]], i32 22 syncscope("singlethread") monotonic, align 4
+; CHECK-NEXT: ret void
+;
+ %lptr = alloca i32, align 4, addrspace(5)
+ %a = addrspacecast ptr addrspace(1) @gptr to ptr
+ %b = addrspacecast ptr addrspace(5) %lptr to ptr
+ %add_a = getelementptr inbounds i8, ptr %a, i32 %offset
+ %ptr = select i1 %cond1, ptr %add_a, ptr %b
+ %ptr2 = select i1 %cond2, ptr %ptr, ptr %c
+ %atomicrmw.xchg = atomicrmw xchg ptr %ptr2, i32 12 monotonic, align 4
+ %atomicrmw.add = atomicrmw add ptr %ptr2, i32 13 monotonic, align 4
+ %atomicrmw.sub = atomicrmw sub ptr %ptr2, i32 14 monotonic, align 4
+ %atomicrmw.and = atomicrmw and ptr %ptr2, i32 15 monotonic, align 4
+ %atomicrmw.nand = atomicrmw nand ptr %ptr2, i32 16 monotonic, align 4
+ %atomicrmw.or = atomicrmw or ptr %ptr2, i32 17 monotonic, align 4
+ %atomicrmw.xor = atomicrmw xor ptr %ptr2, i32 18 monotonic, align 4
+ %atomicrmw.max = atomicrmw max ptr %ptr2, i32 19 monotonic, align 4
+ %atomicrmw.min = atomicrmw volatile min ptr %ptr2, i32 20 monotonic, align 4
+ %atomicrmw.umax = atomicrmw umax ptr %ptr2, i32 21 syncscope("singlethread") monotonic, align 4
+ %atomicrmw.umin = atomicrmw volatile umin ptr %ptr2, i32 22 syncscope("singlethread") monotonic, align 4
+ ret void
+}
+
+define amdgpu_kernel void @no_alias_addr_space_branch_atomicrmw_flat(ptr %c, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_branch_atomicrmw_flat(
+; CHECK-SAME: ptr [[C:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br i1 [[COND1]], label %[[BB_1_TRUE:.*]], label %[[BB_1_FALSE:.*]]
+; CHECK: [[BB_1_TRUE]]:
+; CHECK-NEXT: [[A:%.*]] = addrspacecast ptr addrspace(1) @gptr to ptr
+; CHECK-NEXT: br label %[[BB_1_END:.*]]
+; CHECK: [[BB_1_FALSE]]:
+; CHECK-NEXT: [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT: [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
+; CHECK-NEXT: br label %[[BB_1_END]]
+; CHECK: [[BB_1_END]]:
+; CHECK-NEXT: [[PTR1:%.*]] = phi ptr [ [[A]], %[[BB_1_TRUE]] ], [ [[B]], %[[BB_1_FALSE]] ]
+; CHECK-NEXT: br i1 [[COND2]], label %[[BB_2_TRUE:.*]], label %[[BB_2_END:.*]]
+; CHECK: [[BB_2_TRUE]]:
+; CHECK-NEXT: br label %[[BB_2_END]]
+; CHECK: [[BB_2_END]]:
+; CHECK-NEXT: [[PTR2:%.*]] = phi ptr [ [[PTR1]], %[[BB_1_END]] ], [ [[C]], %[[BB_2_TRUE]] ]
+; CHECK-NEXT: [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr [[PTR2]], i32 12 monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_ADD:%.*]] = atomicrmw add ptr [[PTR2]], i32 13 monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_SUB:%.*]] = atomicrmw sub ptr [[PTR2]], i32 14 monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_AND:%.*]] = atomicrmw and ptr [[PTR2]], i32 15 monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_NAND:%.*]] = atomicrmw nand ptr [[PTR2]], i32 16 monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_OR:%.*]] = atomicrmw or ptr [[PTR2]], i32 17 monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_XOR:%.*]] = atomicrmw xor ptr [[PTR2]], i32 18 monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_MAX:%.*]] = atomicrmw max ptr [[PTR2]], i32 19 monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_MIN:%.*]] = atomicrmw volatile min ptr [[PTR2]], i32 20 monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_UMAX:%.*]] = atomicrmw umax ptr [[PTR2]], i32 21 syncscope("singlethread") monotonic, align 4
+; CHECK-NEXT: [[ATOMICRMW_UMIN:%.*]] = atomicrmw volatile umin ptr [[PTR2]], i32 22 syncscope("singlethread") monotonic, align 4
+; CHECK-NEXT: ret void
+;
+ br i1 %cond1, label %bb.1.true, label %bb.1.false
+bb.1.true:
+ %a = addrspacecast ptr addrspace(1) @gptr to ptr
+ br label %bb.1.end
+
+bb.1.false:
+ %lptr = alloca i32, align 4, addrspace(5)
+ %b = addrspacecast ptr addrspace(5) %lptr to ptr
+ br label %bb.1.end
+
+bb.1.end:
+ %ptr1 = phi ptr [ %a, %bb.1.true ], [ %b, %bb.1.false ]
+ br i1 %cond2, label %bb.2.true, label %bb.2.end
+
+bb.2.true:
+ br label %bb.2.end
+
+bb.2.end:
+ %ptr2 = phi ptr [ %ptr1, %bb.1.end ], [ %c, %bb.2.true ]
+ %atomicrmw.xchg = atomicrmw xchg ptr %ptr2, i32 12 monotonic, align 4
+ %atomicrmw.add = atomicrmw add ptr %ptr2, i32 13 monotonic, align 4
+ %atomicrmw.sub = atomicrmw sub ptr %ptr2, i32 14 monotonic, align 4
+ %atomicrmw.and = atomicrmw and ptr %ptr2, i32 15 monotonic, align 4
+ %atomicrmw.nand = atomicrmw nand ptr %ptr2, i32 16 monotonic, align 4
+ %atomicrmw.or = atomicrmw or ptr %ptr2, i32 17 monotonic, align 4
+ %atomicrmw.xor = atomicrmw xor ptr %ptr2, i32 18 monotonic, align 4
+ %atomicrmw.max = atomicrmw max ptr %ptr2, i32 19 monotonic, align 4
+ %atomicrmw.min = atomicrmw volatile min ptr %ptr2, i32 20 monotonic, align 4
+ %atomicrmw.umax = atomicrmw umax ptr %ptr2, i32 21 syncscope("singlethread") monotonic, align 4
+ %atomicrmw.umin = atomicrmw volatile umin ptr %ptr2, i32 22 syncscope("singlethread") monotonic, align 4
+ ret void
+}
+
+define internal void @callee_no_alias_addr_space_select(ptr %ptr1, ptr %ptr2, ptr %ptr3, i1 %cond1, i1 %cond2, i32 %val) #0 {
+; CHECK-LABEL: define internal void @callee_no_alias_addr_space_select(
+; CHECK-SAME: ptr [[PTR1:%.*]], ptr [[PTR2:%.*]], ptr [[PTR3:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[PTR4:%.*]] = select i1 [[COND1]], ptr addrspacecast (ptr addrspace(1) @gptr to ptr), ptr addrspacecast (ptr addrspace(4) @gptr2 to ptr)
+; CHECK-NEXT: [[PTR5:%.*]] = select i1 [[COND2]], ptr [[PTR4]], ptr addrspacecast (ptr addrspace(3) @gptr3 to ptr)
+; CHECK-NEXT: store i32 [[VAL]], ptr [[PTR5]], align 4, !noalias.addrspace [[META1:![0-9]+]]
+; CHECK-NEXT: [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr [[PTR5]], i32 12 monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_ADD:%.*]] = atomicrmw add ptr [[PTR5]], i32 13 monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_SUB:%.*]] = atomicrmw sub ptr [[PTR5]], i32 14 monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_AND:%.*]] = atomicrmw and ptr [[PTR5]], i32 15 monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_NAND:%.*]] = atomicrmw nand ptr [[PTR5]], i32 16 monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_OR:%.*]] = atomicrmw or ptr [[PTR5]], i32 17 monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_XOR:%.*]] = atomicrmw xor ptr [[PTR5]], i32 18 monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_MAX:%.*]] = atomicrmw max ptr [[PTR5]], i32 19 monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_MIN:%.*]] = atomicrmw volatile min ptr [[PTR5]], i32 20 monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_UMAX:%.*]] = atomicrmw umax ptr [[PTR5]], i32 21 syncscope("singlethread") monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_UMIN:%.*]] = atomicrmw volatile umin ptr [[PTR5]], i32 22 syncscope("singlethread") monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: ret void
+;
+ %ptr4 = select i1 %cond1, ptr %ptr1, ptr %ptr2
+ %ptr5 = select i1 %cond2, ptr %ptr4, ptr %ptr3
+ store i32 %val, ptr %ptr5
+ %atomicrmw.xchg = atomicrmw xchg ptr %ptr5, i32 12 monotonic, align 4
+ %atomicrmw.add = atomicrmw add ptr %ptr5, i32 13 monotonic, align 4
+ %atomicrmw.sub = atomicrmw sub ptr %ptr5, i32 14 monotonic, align 4
+ %atomicrmw.and = atomicrmw and ptr %ptr5, i32 15 monotonic, align 4
+ %atomicrmw.nand = atomicrmw nand ptr %ptr5, i32 16 monotonic, align 4
+ %atomicrmw.or = atomicrmw or ptr %ptr5, i32 17 monotonic, align 4
+ %atomicrmw.xor = atomicrmw xor ptr %ptr5, i32 18 monotonic, align 4
+ %atomicrmw.max = atomicrmw max ptr %ptr5, i32 19 monotonic, align 4
+ %atomicrmw.min = atomicrmw volatile min ptr %ptr5, i32 20 monotonic, align 4
+ %atomicrmw.umax = atomicrmw umax ptr %ptr5, i32 21 syncscope("singlethread") monotonic, align 4
+ %atomicrmw.umin = atomicrmw volatile umin ptr %ptr5, i32 22 syncscope("singlethread") monotonic, align 4
+ ret void
+}
+
+define internal void @callee_alias_addr_space_branch(ptr %ptr1, ptr %ptr2, ptr %ptr3, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define internal void @callee_alias_addr_space_branch(
+; CHECK-SAME: ptr [[PTR1:%.*]], ptr [[PTR2:%.*]], ptr [[PTR3:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: br i1 [[COND1]], label %[[BB_1_TRUE:.*]], label %[[BB_1_FALSE:.*]]
+; CHECK: [[BB_1_TRUE]]:
+; CHECK-NEXT: br label %[[BB_1_END:.*]]
+; CHECK: [[BB_1_FALSE]]:
+; CHECK-NEXT: br label %[[BB_1_END]]
+; CHECK: [[BB_1_END]]:
+; CHECK-NEXT: [[PTR4:%.*]] = phi ptr [ addrspacecast (ptr addrspace(1) @gptr to ptr), %[[BB_1_TRUE]] ], [ addrspacecast (ptr addrspace(4) @gptr2 to ptr), %[[BB_1_FALSE]] ]
+; CHECK-NEXT: br i1 [[COND2]], label %[[BB_2_TRUE:.*]], label %[[BB_2_END:.*]]
+; CHECK: [[BB_2_TRUE]]:
+; CHECK-NEXT: br label %[[BB_2_END]]
+; CHECK: [[BB_2_END]]:
+; CHECK-NEXT: [[PTR5:%.*]] = phi ptr [ [[PTR4]], %[[BB_1_END]] ], [ addrspacecast (ptr addrspace(3) @gptr3 to ptr), %[[BB_2_TRUE]] ]
+; CHECK-NEXT: store i32 [[VAL]], ptr [[PTR5]], align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr [[PTR5]], i32 12 monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_ADD:%.*]] = atomicrmw add ptr [[PTR5]], i32 13 monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_SUB:%.*]] = atomicrmw sub ptr [[PTR5]], i32 14 monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_AND:%.*]] = atomicrmw and ptr [[PTR5]], i32 15 monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_NAND:%.*]] = atomicrmw nand ptr [[PTR5]], i32 16 monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_OR:%.*]] = atomicrmw or ptr [[PTR5]], i32 17 monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_XOR:%.*]] = atomicrmw xor ptr [[PTR5]], i32 18 monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_MAX:%.*]] = atomicrmw max ptr [[PTR5]], i32 19 monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_MIN:%.*]] = atomicrmw volatile min ptr [[PTR5]], i32 20 monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_UMAX:%.*]] = atomicrmw umax ptr [[PTR5]], i32 21 syncscope("singlethread") monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: [[ATOMICRMW_UMIN:%.*]] = atomicrmw volatile umin ptr [[PTR5]], i32 22 syncscope("singlethread") monotonic, align 4, !noalias.addrspace [[META1]]
+; CHECK-NEXT: ret void
+;
+ br i1 %cond1, label %bb.1.true, label %bb.1.false
+bb.1.true:
+ br label %bb.1.end
+
+bb.1.false:
+ br label %bb.1.end
+
+bb.1.end:
+ %ptr4 = phi ptr [ %ptr1, %bb.1.true ], [ %ptr2, %bb.1.false ]
+ br i1 %cond2, label %bb.2.true, label %bb.2.end
+
+bb.2.true:
+ br label %bb.2.end
+
+bb.2.end:
+ %ptr5 = phi ptr [ %ptr4, %bb.1.end ], [ %ptr3, %bb.2.true ]
+ store i32 %val, ptr %ptr5
+ %atomicrmw.xchg = atomicrmw xchg ptr %ptr5, i32 12 monotonic, align 4
+ %atomicrmw.add = atomicrmw add ptr %ptr5, i32 13 monotonic, align 4
+ %atomicrmw.sub = atomicrmw sub ptr %ptr5, i32 14 monotonic, align 4
+ %atomicrmw.and = atomicrmw and ptr %ptr5, i32 15 monotonic, align 4
+ %atomicrmw.nand = atomicrmw nand ptr %ptr5, i32 16 monotonic, align 4
+ %atomicrmw.or = atomicrmw or ptr %ptr5, i32 17 monotonic, align 4
+ %atomicrmw.xor = atomicrmw xor ptr %ptr5, i32 18 monotonic, align 4
+ %atomicrmw.max = atomicrmw max ptr %ptr5, i32 19 monotonic, align 4
+ %atomicrmw.min = atomicrmw volatile min ptr %ptr5, i32 20 monotonic, align 4
+ %atomicrmw.umax = atomicrmw umax ptr %ptr5, i32 21 syncscope("singlethread") monotonic, align 4
+ %atomicrmw.umin = atomicrmw volatile umin ptr %ptr5, i32 22 syncscope("singlethread") monotonic, align 4
+ ret void
+}
+
+define amdgpu_kernel void @kernal_call_func(i1 %cond1, i1 %cond2, i32 %val) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @kernal_call_func(
+; CHECK-SAME: i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT: call void @callee_no_alias_addr_space_select(ptr addrspacecast (ptr addrspace(1) @gptr to ptr), ptr addrspacecast (ptr addrspace(4) @gptr2 to ptr), ptr addrspacecast (ptr addrspace(3) @gptr3 to ptr), i1 [[COND1]], i1 [[COND2]], i32 [[VAL]])
+; CHECK-NEXT: call void @callee_alias_addr_space_branch(ptr addrspacecast (ptr addrspace(1) @gptr to ptr), ptr addrspacecast (ptr addrspace(4) @gptr2 to ptr), ptr addrspacecast (ptr addrspace(3) @gptr3 to ptr), i1 [[COND1]], i1 [[COND2]], i32 [[VAL]])
+; CHECK-NEXT: ret void
+;
+ %lptr = alloca i32, align 4, addrspace(5)
+ %a = addrspacecast ptr addrspace(1) @gptr to ptr
+ %b = addrspacecast ptr addrspace(4) @gptr2 to ptr
+ %c = addrspacecast ptr addrspace(3) @gptr3 to ptr
+ call void @callee_no_alias_addr_space_select(ptr %a, ptr %b, ptr %c, i1 %cond1, i1 %cond2, i32 %val)
+ call void @callee_alias_addr_space_branch(ptr %a, ptr %b, ptr %c, i1 %cond1, i1 %cond2, i32 %val)
+ ret void
+}
+
+define amdgpu_kernel void @no_alias_addr_space_unhandled(ptr addrspace(3) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_unhandled(
+; CHECK-SAME: ptr addrspace(3) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(3) [[SPTR]], align 4
+; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(5) [[LPTR]], align 4
+; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) @gptr, align 4
+; CHECK-NEXT: ret void
+;
+ %lptr = alloca i32, align 4, addrspace(5)
+ store i32 %val, ptr addrspace(3) %sptr
+ store i32 %val, ptr addrspace(5) %lptr
+ store i32 %val, ptr addrspace(1) @gptr
+ ret void
+}
+
+define amdgpu_kernel void @no_alias_addr_space_has_meta(ptr addrspace(3) %sptr, i1 %cond1, i1 %cond2, i32 %val, i32 %offset) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_has_meta(
+; CHECK-SAME: ptr addrspace(3) [[SPTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT: [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
+; CHECK-NEXT: [[C:%.*]] = addrspacecast ptr addrspace(3) [[SPTR]] to ptr
+; CHECK-NEXT: [[ADD_A:%.*]] = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(1) @gptr to ptr), i32 [[OFFSET]]
+; CHECK-NEXT: [[PTR:%.*]] = select i1 [[COND1]], ptr [[ADD_A]], ptr [[B]]
+; CHECK-NEXT: [[PTR2:%.*]] = select i1 [[COND2]], ptr [[PTR]], ptr [[C]]
+; CHECK-NEXT: store i32 [[VAL]], ptr [[PTR2]], align 4, !noalias.addrspace [[META2:![0-9]+]]
+; CHECK-NEXT: ret void
+;
+ %lptr = alloca i32, align 4, addrspace(5)
+ %a = addrspacecast ptr addrspace(1) @gptr to ptr
+ %b = addrspacecast ptr addrspace(5) %lptr to ptr
+ %c = addrspacecast ptr addrspace(3) %sptr to ptr
+ %add_a = getelementptr inbounds i8, ptr %a, i32 %offset
+ %ptr = select i1 %cond1, ptr %add_a, ptr %b
+ %ptr2 = select i1 %cond2, ptr %ptr, ptr %c
+ store i32 %val, ptr %ptr2, !noalias.addrspace !0
+ ret void
+}
+
+!0 = !{i32 2, i32 3, i32 4, i32 10}
+
+;.
+; CHECK: [[META0]] = !{i32 2, i32 3, i32 4, i32 5, i32 6, i32 10}
+; CHECK: [[META1]] = !{i32 2, i32 3, i32 5, i32 10}
+; CHECK: [[META2]] = !{i32 2, i32 3, i32 4, i32 10}
+;.
More information about the llvm-commits
mailing list