[llvm] [InferAddressSpaces] Support address space inference from load values (PR #171019)

via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 7 00:07:49 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: actink (actinks)

<details>
<summary>Changes</summary>

    Extend the generic InferAddressSpaces pass to infer a pointer’s address space from the value produced by a load instruction.  
    Assume LiveOnEntryDef pointers reside in the global address space.  
    Collect store instructions with flat address space pointers to improve inference of shared/local memory address spaces.  
    Use MemorySSA to analyze clobber relationships and propagate address space information from clobbering stores or LiveOnEntryDef assumptions.

This enhancement is target-independent and now serves as the unified address space inference mechanism for backends like AMDGPU and NVPTX.
The AMDGPUPromoteKernelArguments pass no longer performs address space inference; it retains only the functionality to attach the "amdgpu.noclobber" metadata to LoadInsts.

---

Patch is 41.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171019.diff


16 Files Affected:

- (modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+6) 
- (modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+6) 
- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+6) 
- (modified) llvm/include/llvm/Target/TargetMachine.h (+8) 
- (modified) llvm/lib/Analysis/TargetTransformInfo.cpp (+9) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp (+1-36) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+28-3) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h (+2) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (+22) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (+2) 
- (modified) llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp (+48) 
- (modified) llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h (+5) 
- (modified) llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp (+153-7) 
- (modified) llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll (+1-1) 
- (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll (+3-1) 
- (added) llvm/test/Transforms/InferAddressSpaces/NVPTX/load-ptr.ll (+247) 


``````````diff
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 99525607f744a..efb352018fbe4 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -563,6 +563,8 @@ class TargetTransformInfo {
 
   LLVM_ABI unsigned getAssumedAddrSpace(const Value *V) const;
 
+  LLVM_ABI unsigned getAssumedLiveOnEntryDefAddrSpace(const Value *V) const;
+
   LLVM_ABI bool isSingleThreaded() const;
 
   LLVM_ABI std::pair<const Value *, unsigned>
@@ -577,6 +579,10 @@ class TargetTransformInfo {
                                                    Value *OldV,
                                                    Value *NewV) const;
 
+  /// Return true if \p IID only performs an artificial clobber to facilitate
+  /// ordering constraints.
+  LLVM_ABI bool isArtificialClobber(Intrinsic::ID IID) const;
+
   /// Test whether calls to a function lower to actual program function
   /// calls.
   ///
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 835eb7701ccfa..0130b5225ce3f 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -158,6 +158,10 @@ class TargetTransformInfoImplBase {
 
   virtual unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
 
+  virtual unsigned getAssumedLiveOnEntryDefAddrSpace(const Value *V) const {
+    return -1;
+  }
+
   virtual bool isSingleThreaded() const { return false; }
 
   virtual std::pair<const Value *, unsigned>
@@ -171,6 +175,8 @@ class TargetTransformInfoImplBase {
     return nullptr;
   }
 
+  virtual bool isArtificialClobber(Intrinsic::ID IID) const { return false; }
+
   virtual bool isLoweredToCall(const Function *F) const {
     assert(F && "A concrete function must be provided to this routine.");
 
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 494199835a19c..10708245b1180 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -440,6 +440,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
   }
 
+  unsigned getAssumedLiveOnEntryDefAddrSpace(const Value *V) const override {
+    return getTLI()->getTargetMachine().getAssumedLiveOnEntryDefAddrSpace(V);
+  }
+
   bool isSingleThreaded() const override {
     return getTLI()->getTargetMachine().Options.ThreadModel ==
            ThreadModel::Single;
@@ -455,6 +459,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     return nullptr;
   }
 
+  bool isArtificialClobber(Intrinsic::ID IID) const override { return false; }
+
   bool isLegalAddImmediate(int64_t imm) const override {
     return getTLI()->isLegalAddImmediate(imm);
   }
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index d0fd483a8ddaa..03e0b43686cd4 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -378,6 +378,14 @@ class LLVM_ABI TargetMachine {
   /// properties.
   virtual unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
 
+  /// LiveOnEntryDef same as MemorySSA's concept.
+  /// Loads and stores from pointer arguments and other global values may be
+  /// defined by memory operations that do not occur in the current function.
+  /// Return the assumed address space for such memory operations.
+  virtual unsigned getAssumedLiveOnEntryDefAddrSpace(const Value *V) const {
+    return -1;
+  }
+
   /// If the specified predicate checks whether a generic pointer falls within
   /// a specified address space, return that generic pointer and the address
   /// space being queried.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index c529d87502acd..d943c2171d6a8 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -339,6 +339,11 @@ unsigned TargetTransformInfo::getAssumedAddrSpace(const Value *V) const {
   return TTIImpl->getAssumedAddrSpace(V);
 }
 
+unsigned
+TargetTransformInfo::getAssumedLiveOnEntryDefAddrSpace(const Value *V) const {
+  return TTIImpl->getAssumedLiveOnEntryDefAddrSpace(V);
+}
+
 bool TargetTransformInfo::isSingleThreaded() const {
   return TTIImpl->isSingleThreaded();
 }
@@ -353,6 +358,10 @@ Value *TargetTransformInfo::rewriteIntrinsicWithAddressSpace(
   return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
 }
 
+bool TargetTransformInfo::isArtificialClobber(Intrinsic::ID IID) const {
+  return TTIImpl->isArtificialClobber(IID);
+}
+
 bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
   return TTIImpl->isLoweredToCall(F);
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
index 06819d05b4be6..8ec9a1b15e6a5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
@@ -34,8 +34,6 @@ class AMDGPUPromoteKernelArguments : public FunctionPass {
 
   AliasAnalysis *AA;
 
-  Instruction *ArgCastInsertPt;
-
   SmallVector<Value *> Ptrs;
 
   void enqueueUsers(Value *Ptr);
@@ -107,24 +105,7 @@ bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
       PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
     enqueueUsers(Ptr);
 
-  if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
-    return Changed;
-
-  IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator())
-                   : ArgCastInsertPt);
-
-  // Cast pointer to global address space and back to flat and let
-  // Infer Address Spaces pass to do all necessary rewriting.
-  PointerType *NewPT =
-      PointerType::get(PT->getContext(), AMDGPUAS::GLOBAL_ADDRESS);
-  Value *Cast =
-      B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
-  Value *CastBack =
-      B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
-  Ptr->replaceUsesWithIf(CastBack,
-                         [Cast](Use &U) { return U.getUser() != Cast; });
-
-  return true;
+  return Changed;
 }
 
 bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {
@@ -135,21 +116,6 @@ bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {
   return true;
 }
 
-// skip allocas
-static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
-  BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
-  for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
-    AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
-
-    // If this is a dynamic alloca, the value may depend on the loaded kernargs,
-    // so loads will need to be inserted before it.
-    if (!AI || !AI->isStaticAlloca())
-      break;
-  }
-
-  return InsPt;
-}
-
 bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA,
                                        AliasAnalysis &AA) {
   if (skipFunction(F))
@@ -159,7 +125,6 @@ bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA,
   if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
     return false;
 
-  ArgCastInsertPt = &*getInsertPt(*F.begin());
   this->MSSA = &MSSA;
   this->AA = &AA;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index e5a35abe6da6b..fbda0196b4617 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1042,13 +1042,38 @@ unsigned AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const {
   assert(V->getType()->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS);
 
   const auto *Ptr = LD->getPointerOperand();
-  if (Ptr->getType()->getPointerAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
-    return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
+
   // For a generic pointer loaded from the constant memory, it could be assumed
   // as a global pointer since the constant memory is only populated on the
   // host side. As implied by the offload programming model, only global
   // pointers could be referenced on the host side.
-  return AMDGPUAS::GLOBAL_ADDRESS;
+  if (Ptr->getType()->getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
+    return AMDGPUAS::GLOBAL_ADDRESS;
+
+  // For a generic pointer loaded from the readonly and noalias arg, same as
+  // above.
+  if (const Argument *Arg = dyn_cast<Argument>(getUnderlyingObject(Ptr)))
+    if (AMDGPU::isModuleEntryFunctionCC(Arg->getParent()->getCallingConv()) &&
+        Arg->onlyReadsMemory() && Arg->hasNoAliasAttr())
+      return AMDGPUAS::GLOBAL_ADDRESS;
+
+  return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
+}
+
+unsigned
+AMDGPUTargetMachine::getAssumedLiveOnEntryDefAddrSpace(const Value *V) const {
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    if (AMDGPU::isModuleEntryFunctionCC(
+            I->getParent()->getParent()->getCallingConv()))
+      return AMDGPUAS::GLOBAL_ADDRESS;
+  }
+  if (const LoadInst *LD = dyn_cast<LoadInst>(V)) {
+    // same as getAssumedAddrSpace
+    if (LD->getPointerOperandType()->getPointerAddressSpace() ==
+        AMDGPUAS::CONSTANT_ADDRESS)
+      return AMDGPUAS::GLOBAL_ADDRESS;
+  }
+  return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
 }
 
 std::pair<const Value *, unsigned>
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 06a3047196b8a..ea21c095faf75 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -62,6 +62,8 @@ class AMDGPUTargetMachine : public CodeGenTargetMachineImpl {
 
   unsigned getAssumedAddrSpace(const Value *V) const override;
 
+  unsigned getAssumedLiveOnEntryDefAddrSpace(const Value *V) const override;
+
   std::pair<const Value *, unsigned>
   getPredicatedAddrSpace(const Value *V) const override;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index dfa21515838ff..a151b0c3989fa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1223,6 +1223,28 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
   }
 }
 
+bool GCNTTIImpl::isArtificialClobber(Intrinsic::ID IID) const {
+  switch (IID) {
+  case Intrinsic::amdgcn_s_barrier:
+  case Intrinsic::amdgcn_s_cluster_barrier:
+  case Intrinsic::amdgcn_s_barrier_signal:
+  case Intrinsic::amdgcn_s_barrier_signal_var:
+  case Intrinsic::amdgcn_s_barrier_signal_isfirst:
+  case Intrinsic::amdgcn_s_barrier_init:
+  case Intrinsic::amdgcn_s_barrier_join:
+  case Intrinsic::amdgcn_s_barrier_wait:
+  case Intrinsic::amdgcn_s_barrier_leave:
+  case Intrinsic::amdgcn_s_get_barrier_state:
+  case Intrinsic::amdgcn_wave_barrier:
+  case Intrinsic::amdgcn_sched_barrier:
+  case Intrinsic::amdgcn_sched_group_barrier:
+  case Intrinsic::amdgcn_iglp_opt:
+    return true;
+  default:
+    return false;
+  }
+}
+
 InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
                                            VectorType *DstTy, VectorType *SrcTy,
                                            ArrayRef<int> Mask,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 20da8344c9d37..12be42c16d025 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -210,6 +210,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
   Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
                                           Value *NewV) const override;
 
+  bool isArtificialClobber(Intrinsic::ID IID) const override;
+
   bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0,
                                  const Value *Op1, InstCombiner &IC) const;
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 5d5553c573b0f..c61aae8335aa4 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -592,6 +592,32 @@ Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
   return nullptr;
 }
 
+bool NVPTXTTIImpl::isArtificialClobber(Intrinsic::ID IID) const {
+  switch (IID) {
+  case Intrinsic::nvvm_bar_warp_sync:
+  case Intrinsic::nvvm_barrier_cluster_arrive:
+  case Intrinsic::nvvm_barrier_cluster_arrive_aligned:
+  case Intrinsic::nvvm_barrier_cluster_arrive_relaxed:
+  case Intrinsic::nvvm_barrier_cluster_arrive_relaxed_aligned:
+  case Intrinsic::nvvm_barrier_cluster_wait:
+  case Intrinsic::nvvm_barrier_cluster_wait_aligned:
+  case Intrinsic::nvvm_barrier_cta_arrive_aligned_count:
+  case Intrinsic::nvvm_barrier_cta_arrive_count:
+  case Intrinsic::nvvm_barrier_cta_sync_aligned_all:
+  case Intrinsic::nvvm_barrier_cta_sync_aligned_count:
+  case Intrinsic::nvvm_barrier_cta_sync_all:
+  case Intrinsic::nvvm_barrier_cta_sync_count:
+  case Intrinsic::nvvm_barrier0_and:
+  case Intrinsic::nvvm_barrier0_or:
+  case Intrinsic::nvvm_barrier0_popc:
+  case Intrinsic::nvvm_membar_cta:
+  case Intrinsic::nvvm_membar_gl:
+  case Intrinsic::nvvm_membar_sys:
+    return true;
+  default:
+    return false;
+  }
+}
 bool NVPTXTTIImpl::isLegalMaskedStore(Type *DataTy, Align Alignment,
                                       unsigned AddrSpace,
                                       TTI::MaskKind MaskKind) const {
@@ -657,6 +683,28 @@ unsigned NVPTXTTIImpl::getAssumedAddrSpace(const Value *V) const {
     }
   }
 
+  if (const auto *LD = dyn_cast<LoadInst>(V)) {
+    // It must be a generic pointer loaded.
+    assert(V->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GENERIC);
+
+    // For a generic pointer loaded from the readonly and noalias arg, it could
+    // be assumed as a global pointer since the readonly memory is only
+    // populated on the host side.
+    if (const Argument *Arg =
+            dyn_cast<Argument>(getUnderlyingObject(LD->getPointerOperand())))
+      if (isKernelFunction(*Arg->getParent()) && Arg->onlyReadsMemory() &&
+          Arg->hasNoAliasAttr())
+        return ADDRESS_SPACE_GLOBAL;
+  }
+  return -1;
+}
+
+unsigned NVPTXTTIImpl::getAssumedLiveOnEntryDefAddrSpace(const Value *V) const {
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    if (isKernelFunction(*I->getParent()->getParent())) {
+      return ADDRESS_SPACE_GLOBAL;
+    }
+  }
   return -1;
 }
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index d7f4e1da4073b..e1cab29df4c1d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -191,8 +191,13 @@ class NVPTXTTIImpl final : public BasicTTIImplBase<NVPTXTTIImpl> {
 
   Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
                                           Value *NewV) const override;
+
+  bool isArtificialClobber(Intrinsic::ID IID) const override;
+
   unsigned getAssumedAddrSpace(const Value *V) const override;
 
+  unsigned getAssumedLiveOnEntryDefAddrSpace(const Value *V) const override;
+
   void collectKernelLaunchBounds(
       const Function &F,
       SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 352a1b331001a..594ee6d1792e2 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -94,7 +94,9 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
@@ -176,6 +178,8 @@ class InferAddressSpaces : public FunctionPass {
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addRequired<AAResultsWrapperPass>();
+    AU.addRequired<MemorySSAWrapperPass>();
   }
 
   bool runOnFunction(Function &F) override;
@@ -186,8 +190,9 @@ class InferAddressSpacesImpl {
   Function *F = nullptr;
   const DominatorTree *DT = nullptr;
   const TargetTransformInfo *TTI = nullptr;
+  MemorySSA *MSSA = nullptr;
+  mutable BatchAAResults BatchAA;
   const DataLayout *DL = nullptr;
-
   /// Target specific address space which uses of should be replaced if
   /// possible.
   unsigned FlatAddrSpace = 0;
@@ -245,11 +250,19 @@ class InferAddressSpacesImpl {
 
   unsigned getPredicatedAddrSpace(const Value &PtrV,
                                   const Value *UserCtx) const;
+  unsigned
+  getLoadPtrAddrSpaceImpl(const LoadInst *LI, unsigned NewAS, MemoryAccess *MA,
+                          ValueToAddrSpaceMapTy &InferredAddrSpace,
+                          SmallPtrSet<MemoryAccess *, 8> Visited) const;
+  unsigned getLoadPtrAddrSpace(const LoadInst *LI,
+                               ValueToAddrSpaceMapTy &InferredAddrSpace) const;
 
 public:
   InferAddressSpacesImpl(AssumptionCache &AC, const DominatorTree *DT,
-                         const TargetTransformInfo *TTI, unsigned FlatAddrSpace)
-      : AC(AC), DT(DT), TTI(TTI), FlatAddrSpace(FlatAddrSpace) {}
+                         const TargetTransformInfo *TTI, MemorySSA *MSSA,
+                         AliasAnalysis *AA, unsigned FlatAddrSpace)
+      : AC(AC), DT(DT), TTI(TTI), MSSA(MSSA), BatchAA(*AA),
+        FlatAddrSpace(FlatAddrSpace) {}
   bool run(Function &F);
 };
 
@@ -261,6 +274,8 @@ INITIALIZE_PASS_BEGIN(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
                       false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
 INITIALIZE_PASS_END(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
                     false, false)
 
@@ -327,6 +342,9 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
   case Instruction::AddrSpaceCast:
   case Instruction::GetElementPtr:
     return true;
+  case Instruction::Load:
+    return TTI->getAssumedLiveOnEntryDefAddrSpace(&V) !=
+           UninitializedAddressSpace;
   case Instruction::Select:
     return Op->getType()->isPtrOrPtrVectorTy();
   case Instruction::Call: {
@@ -360,6 +378,8 @@ getPointerOperands(const Value &V, const DataLayout &DL,
   case Instruction::AddrSpaceCast:
   case Instruction::GetElementPtr:
     return {Op.getOperand(0)};
+  case Instruction::Load:
+    return {};
   case Instruction::Select:
     return {Op.getOperand(1), Op.getOperand(2)};
   case Instruction::Call: {
@@ -561,9 +581,11 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
       PushPtrOperand(GEP->getPointerOperand());
     } else if (auto *LI = dyn_cast<LoadInst>(&I))
       PushPtrOperand(LI->getPointerOperand());
-    else if (auto *SI = dyn_cast<StoreInst>(&I))
+    else if (auto *SI = dyn_cast<StoreInst>(&I)) {
       PushPtrOperand(SI->getPointerOperand());
-    else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
+      if (SI->getValueOperand()->getType()->isPtrOrPtrVectorTy())
+        PushPtrOperand(SI->getValueOperand());
+    } else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
       PushPtrOperand(RMW->getPointerOperand());
     else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
       PushPtrOperand(CmpX->getPointerOperand());
@@ -900,6 +922,14 @@ Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
     return NewI;
   }
 
+  if (auto *LD = dyn_cast<LoadInst>(V)) {
+    Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(LD->getType(), NewAddrSpace);
+    auto *NewI = new AddrSpaceCastInst(V, NewPtrTy);
+    NewI->insertAfter(LD->getIterator());
+    NewI->setDebugLoc(LD->getDebugLoc());
+    return NewI;
+  }
+
   if (Instruction *I = dyn_cast<Instruction>(V)) {
     Value *NewV = cloneIns...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/171019


More information about the llvm-commits mailing list