[llvm] [InferAddressSpaces] Support address space inference from load values (PR #171019)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 7 00:07:49 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: actink (actinks)
<details>
<summary>Changes</summary>
Extend the generic InferAddressSpaces pass to infer a pointer’s address space from the value produced by a load instruction.
Assume LiveOnEntryDef pointers reside in the global address space.
Collect store instructions with flat address space pointers to improve inference of shared/local memory address spaces.
Use MemorySSA to analyze clobber relationships and propagate address space information from clobbering stores or LiveOnEntryDef assumptions.
This enhancement is target-independent and now serves as the unified address space inference mechanism for backends like AMDGPU and NVPTX.
The AMDGPUPromoteKernelArguments pass no longer performs address space inference; it retains only the functionality to attach the "amdgpu.noclobber" metadata to LoadInsts.
---
Patch is 41.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171019.diff
16 Files Affected:
- (modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+6)
- (modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+6)
- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+6)
- (modified) llvm/include/llvm/Target/TargetMachine.h (+8)
- (modified) llvm/lib/Analysis/TargetTransformInfo.cpp (+9)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp (+1-36)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+28-3)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h (+2)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (+22)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (+2)
- (modified) llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp (+48)
- (modified) llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h (+5)
- (modified) llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp (+153-7)
- (modified) llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll (+1-1)
- (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll (+3-1)
- (added) llvm/test/Transforms/InferAddressSpaces/NVPTX/load-ptr.ll (+247)
``````````diff
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 99525607f744a..efb352018fbe4 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -563,6 +563,8 @@ class TargetTransformInfo {
LLVM_ABI unsigned getAssumedAddrSpace(const Value *V) const;
+ LLVM_ABI unsigned getAssumedLiveOnEntryDefAddrSpace(const Value *V) const;
+
LLVM_ABI bool isSingleThreaded() const;
LLVM_ABI std::pair<const Value *, unsigned>
@@ -577,6 +579,10 @@ class TargetTransformInfo {
Value *OldV,
Value *NewV) const;
+ /// Return true if \p IID only performs an artificial clobber to facilitate
+ /// ordering constraints.
+ LLVM_ABI bool isArtificialClobber(Intrinsic::ID IID) const;
+
/// Test whether calls to a function lower to actual program function
/// calls.
///
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 835eb7701ccfa..0130b5225ce3f 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -158,6 +158,10 @@ class TargetTransformInfoImplBase {
virtual unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
+ virtual unsigned getAssumedLiveOnEntryDefAddrSpace(const Value *V) const {
+ return -1;
+ }
+
virtual bool isSingleThreaded() const { return false; }
virtual std::pair<const Value *, unsigned>
@@ -171,6 +175,8 @@ class TargetTransformInfoImplBase {
return nullptr;
}
+ virtual bool isArtificialClobber(Intrinsic::ID IID) const { return false; }
+
virtual bool isLoweredToCall(const Function *F) const {
assert(F && "A concrete function must be provided to this routine.");
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 494199835a19c..10708245b1180 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -440,6 +440,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
}
+ unsigned getAssumedLiveOnEntryDefAddrSpace(const Value *V) const override {
+ return getTLI()->getTargetMachine().getAssumedLiveOnEntryDefAddrSpace(V);
+ }
+
bool isSingleThreaded() const override {
return getTLI()->getTargetMachine().Options.ThreadModel ==
ThreadModel::Single;
@@ -455,6 +459,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return nullptr;
}
+ bool isArtificialClobber(Intrinsic::ID IID) const override { return false; }
+
bool isLegalAddImmediate(int64_t imm) const override {
return getTLI()->isLegalAddImmediate(imm);
}
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index d0fd483a8ddaa..03e0b43686cd4 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -378,6 +378,14 @@ class LLVM_ABI TargetMachine {
/// properties.
virtual unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
+ /// LiveOnEntryDef same as MemorySSA's concept.
+ /// Loads and stores from pointer arguments and other global values may be
+ /// defined by memory operations that do not occur in the current function.
+ /// Return the assumed address space for such memory operations.
+ virtual unsigned getAssumedLiveOnEntryDefAddrSpace(const Value *V) const {
+ return -1;
+ }
+
/// If the specified predicate checks whether a generic pointer falls within
/// a specified address space, return that generic pointer and the address
/// space being queried.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index c529d87502acd..d943c2171d6a8 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -339,6 +339,11 @@ unsigned TargetTransformInfo::getAssumedAddrSpace(const Value *V) const {
return TTIImpl->getAssumedAddrSpace(V);
}
+unsigned
+TargetTransformInfo::getAssumedLiveOnEntryDefAddrSpace(const Value *V) const {
+ return TTIImpl->getAssumedLiveOnEntryDefAddrSpace(V);
+}
+
bool TargetTransformInfo::isSingleThreaded() const {
return TTIImpl->isSingleThreaded();
}
@@ -353,6 +358,10 @@ Value *TargetTransformInfo::rewriteIntrinsicWithAddressSpace(
return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
}
+bool TargetTransformInfo::isArtificialClobber(Intrinsic::ID IID) const {
+ return TTIImpl->isArtificialClobber(IID);
+}
+
bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
return TTIImpl->isLoweredToCall(F);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
index 06819d05b4be6..8ec9a1b15e6a5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
@@ -34,8 +34,6 @@ class AMDGPUPromoteKernelArguments : public FunctionPass {
AliasAnalysis *AA;
- Instruction *ArgCastInsertPt;
-
SmallVector<Value *> Ptrs;
void enqueueUsers(Value *Ptr);
@@ -107,24 +105,7 @@ bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
enqueueUsers(Ptr);
- if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
- return Changed;
-
- IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator())
- : ArgCastInsertPt);
-
- // Cast pointer to global address space and back to flat and let
- // Infer Address Spaces pass to do all necessary rewriting.
- PointerType *NewPT =
- PointerType::get(PT->getContext(), AMDGPUAS::GLOBAL_ADDRESS);
- Value *Cast =
- B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
- Value *CastBack =
- B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
- Ptr->replaceUsesWithIf(CastBack,
- [Cast](Use &U) { return U.getUser() != Cast; });
-
- return true;
+ return Changed;
}
bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {
@@ -135,21 +116,6 @@ bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {
return true;
}
-// skip allocas
-static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
- BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
- for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
- AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
-
- // If this is a dynamic alloca, the value may depend on the loaded kernargs,
- // so loads will need to be inserted before it.
- if (!AI || !AI->isStaticAlloca())
- break;
- }
-
- return InsPt;
-}
-
bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA,
AliasAnalysis &AA) {
if (skipFunction(F))
@@ -159,7 +125,6 @@ bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA,
if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
return false;
- ArgCastInsertPt = &*getInsertPt(*F.begin());
this->MSSA = &MSSA;
this->AA = &AA;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index e5a35abe6da6b..fbda0196b4617 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1042,13 +1042,38 @@ unsigned AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const {
assert(V->getType()->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS);
const auto *Ptr = LD->getPointerOperand();
- if (Ptr->getType()->getPointerAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
- return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
+
// For a generic pointer loaded from the constant memory, it could be assumed
// as a global pointer since the constant memory is only populated on the
// host side. As implied by the offload programming model, only global
// pointers could be referenced on the host side.
- return AMDGPUAS::GLOBAL_ADDRESS;
+ if (Ptr->getType()->getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
+ return AMDGPUAS::GLOBAL_ADDRESS;
+
+ // For a generic pointer loaded from the readonly and noalias arg, same as
+ // above.
+ if (const Argument *Arg = dyn_cast<Argument>(getUnderlyingObject(Ptr)))
+ if (AMDGPU::isModuleEntryFunctionCC(Arg->getParent()->getCallingConv()) &&
+ Arg->onlyReadsMemory() && Arg->hasNoAliasAttr())
+ return AMDGPUAS::GLOBAL_ADDRESS;
+
+ return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
+}
+
+unsigned
+AMDGPUTargetMachine::getAssumedLiveOnEntryDefAddrSpace(const Value *V) const {
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ if (AMDGPU::isModuleEntryFunctionCC(
+ I->getParent()->getParent()->getCallingConv()))
+ return AMDGPUAS::GLOBAL_ADDRESS;
+ }
+ if (const LoadInst *LD = dyn_cast<LoadInst>(V)) {
+ // same as getAssumedAddrSpace
+ if (LD->getPointerOperandType()->getPointerAddressSpace() ==
+ AMDGPUAS::CONSTANT_ADDRESS)
+ return AMDGPUAS::GLOBAL_ADDRESS;
+ }
+ return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
}
std::pair<const Value *, unsigned>
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 06a3047196b8a..ea21c095faf75 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -62,6 +62,8 @@ class AMDGPUTargetMachine : public CodeGenTargetMachineImpl {
unsigned getAssumedAddrSpace(const Value *V) const override;
+ unsigned getAssumedLiveOnEntryDefAddrSpace(const Value *V) const override;
+
std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index dfa21515838ff..a151b0c3989fa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1223,6 +1223,28 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
}
}
+bool GCNTTIImpl::isArtificialClobber(Intrinsic::ID IID) const {
+ switch (IID) {
+ case Intrinsic::amdgcn_s_barrier:
+ case Intrinsic::amdgcn_s_cluster_barrier:
+ case Intrinsic::amdgcn_s_barrier_signal:
+ case Intrinsic::amdgcn_s_barrier_signal_var:
+ case Intrinsic::amdgcn_s_barrier_signal_isfirst:
+ case Intrinsic::amdgcn_s_barrier_init:
+ case Intrinsic::amdgcn_s_barrier_join:
+ case Intrinsic::amdgcn_s_barrier_wait:
+ case Intrinsic::amdgcn_s_barrier_leave:
+ case Intrinsic::amdgcn_s_get_barrier_state:
+ case Intrinsic::amdgcn_wave_barrier:
+ case Intrinsic::amdgcn_sched_barrier:
+ case Intrinsic::amdgcn_sched_group_barrier:
+ case Intrinsic::amdgcn_iglp_opt:
+ return true;
+ default:
+ return false;
+ }
+}
+
InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *DstTy, VectorType *SrcTy,
ArrayRef<int> Mask,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 20da8344c9d37..12be42c16d025 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -210,6 +210,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const override;
+ bool isArtificialClobber(Intrinsic::ID IID) const override;
+
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0,
const Value *Op1, InstCombiner &IC) const;
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 5d5553c573b0f..c61aae8335aa4 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -592,6 +592,32 @@ Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
return nullptr;
}
+bool NVPTXTTIImpl::isArtificialClobber(Intrinsic::ID IID) const {
+ switch (IID) {
+ case Intrinsic::nvvm_bar_warp_sync:
+ case Intrinsic::nvvm_barrier_cluster_arrive:
+ case Intrinsic::nvvm_barrier_cluster_arrive_aligned:
+ case Intrinsic::nvvm_barrier_cluster_arrive_relaxed:
+ case Intrinsic::nvvm_barrier_cluster_arrive_relaxed_aligned:
+ case Intrinsic::nvvm_barrier_cluster_wait:
+ case Intrinsic::nvvm_barrier_cluster_wait_aligned:
+ case Intrinsic::nvvm_barrier_cta_arrive_aligned_count:
+ case Intrinsic::nvvm_barrier_cta_arrive_count:
+ case Intrinsic::nvvm_barrier_cta_sync_aligned_all:
+ case Intrinsic::nvvm_barrier_cta_sync_aligned_count:
+ case Intrinsic::nvvm_barrier_cta_sync_all:
+ case Intrinsic::nvvm_barrier_cta_sync_count:
+ case Intrinsic::nvvm_barrier0_and:
+ case Intrinsic::nvvm_barrier0_or:
+ case Intrinsic::nvvm_barrier0_popc:
+ case Intrinsic::nvvm_membar_cta:
+ case Intrinsic::nvvm_membar_gl:
+ case Intrinsic::nvvm_membar_sys:
+ return true;
+ default:
+ return false;
+ }
+}
bool NVPTXTTIImpl::isLegalMaskedStore(Type *DataTy, Align Alignment,
unsigned AddrSpace,
TTI::MaskKind MaskKind) const {
@@ -657,6 +683,28 @@ unsigned NVPTXTTIImpl::getAssumedAddrSpace(const Value *V) const {
}
}
+ if (const auto *LD = dyn_cast<LoadInst>(V)) {
+ // It must be a generic pointer loaded.
+ assert(V->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GENERIC);
+
+ // For a generic pointer loaded from the readonly and noalias arg, it could
+ // be assumed as a global pointer since the readonly memory is only
+ // populated on the host side.
+ if (const Argument *Arg =
+ dyn_cast<Argument>(getUnderlyingObject(LD->getPointerOperand())))
+ if (isKernelFunction(*Arg->getParent()) && Arg->onlyReadsMemory() &&
+ Arg->hasNoAliasAttr())
+ return ADDRESS_SPACE_GLOBAL;
+ }
+ return -1;
+}
+
+unsigned NVPTXTTIImpl::getAssumedLiveOnEntryDefAddrSpace(const Value *V) const {
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ if (isKernelFunction(*I->getParent()->getParent())) {
+ return ADDRESS_SPACE_GLOBAL;
+ }
+ }
return -1;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index d7f4e1da4073b..e1cab29df4c1d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -191,8 +191,13 @@ class NVPTXTTIImpl final : public BasicTTIImplBase<NVPTXTTIImpl> {
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const override;
+
+ bool isArtificialClobber(Intrinsic::ID IID) const override;
+
unsigned getAssumedAddrSpace(const Value *V) const override;
+ unsigned getAssumedLiveOnEntryDefAddrSpace(const Value *V) const override;
+
void collectKernelLaunchBounds(
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 352a1b331001a..594ee6d1792e2 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -94,7 +94,9 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
@@ -176,6 +178,8 @@ class InferAddressSpaces : public FunctionPass {
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<MemorySSAWrapperPass>();
}
bool runOnFunction(Function &F) override;
@@ -186,8 +190,9 @@ class InferAddressSpacesImpl {
Function *F = nullptr;
const DominatorTree *DT = nullptr;
const TargetTransformInfo *TTI = nullptr;
+ MemorySSA *MSSA = nullptr;
+ mutable BatchAAResults BatchAA;
const DataLayout *DL = nullptr;
-
/// Target specific address space which uses of should be replaced if
/// possible.
unsigned FlatAddrSpace = 0;
@@ -245,11 +250,19 @@ class InferAddressSpacesImpl {
unsigned getPredicatedAddrSpace(const Value &PtrV,
const Value *UserCtx) const;
+ unsigned
+ getLoadPtrAddrSpaceImpl(const LoadInst *LI, unsigned NewAS, MemoryAccess *MA,
+ ValueToAddrSpaceMapTy &InferredAddrSpace,
+ SmallPtrSet<MemoryAccess *, 8> Visited) const;
+ unsigned getLoadPtrAddrSpace(const LoadInst *LI,
+ ValueToAddrSpaceMapTy &InferredAddrSpace) const;
public:
InferAddressSpacesImpl(AssumptionCache &AC, const DominatorTree *DT,
- const TargetTransformInfo *TTI, unsigned FlatAddrSpace)
- : AC(AC), DT(DT), TTI(TTI), FlatAddrSpace(FlatAddrSpace) {}
+ const TargetTransformInfo *TTI, MemorySSA *MSSA,
+ AliasAnalysis *AA, unsigned FlatAddrSpace)
+ : AC(AC), DT(DT), TTI(TTI), MSSA(MSSA), BatchAA(*AA),
+ FlatAddrSpace(FlatAddrSpace) {}
bool run(Function &F);
};
@@ -261,6 +274,8 @@ INITIALIZE_PASS_BEGIN(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_END(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
false, false)
@@ -327,6 +342,9 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
case Instruction::AddrSpaceCast:
case Instruction::GetElementPtr:
return true;
+ case Instruction::Load:
+ return TTI->getAssumedLiveOnEntryDefAddrSpace(&V) !=
+ UninitializedAddressSpace;
case Instruction::Select:
return Op->getType()->isPtrOrPtrVectorTy();
case Instruction::Call: {
@@ -360,6 +378,8 @@ getPointerOperands(const Value &V, const DataLayout &DL,
case Instruction::AddrSpaceCast:
case Instruction::GetElementPtr:
return {Op.getOperand(0)};
+ case Instruction::Load:
+ return {};
case Instruction::Select:
return {Op.getOperand(1), Op.getOperand(2)};
case Instruction::Call: {
@@ -561,9 +581,11 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
PushPtrOperand(GEP->getPointerOperand());
} else if (auto *LI = dyn_cast<LoadInst>(&I))
PushPtrOperand(LI->getPointerOperand());
- else if (auto *SI = dyn_cast<StoreInst>(&I))
+ else if (auto *SI = dyn_cast<StoreInst>(&I)) {
PushPtrOperand(SI->getPointerOperand());
- else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
+ if (SI->getValueOperand()->getType()->isPtrOrPtrVectorTy())
+ PushPtrOperand(SI->getValueOperand());
+ } else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
PushPtrOperand(RMW->getPointerOperand());
else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
PushPtrOperand(CmpX->getPointerOperand());
@@ -900,6 +922,14 @@ Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
return NewI;
}
+ if (auto *LD = dyn_cast<LoadInst>(V)) {
+ Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(LD->getType(), NewAddrSpace);
+ auto *NewI = new AddrSpaceCastInst(V, NewPtrTy);
+ NewI->insertAfter(LD->getIterator());
+ NewI->setDebugLoc(LD->getDebugLoc());
+ return NewI;
+ }
+
if (Instruction *I = dyn_cast<Instruction>(V)) {
Value *NewV = cloneIns...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/171019
More information about the llvm-commits
mailing list