[llvm] [GVN] MemorySSA for GVN: eliminate redundant loads via MemorySSA (PR #152859)
Momchil Velikov via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 11 02:30:41 PST 2025
================
@@ -2146,10 +2191,530 @@ static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
I->replaceAllUsesWith(Repl);
}
+/// If a load has !invariant.group, try to find the most-dominating instruction
+/// with the same metadata and equivalent pointer (modulo bitcasts and zero
+/// GEPs). If one is found that dominates the load, its value can be reused.
+static Instruction *findInvariantGroupValue(LoadInst *L, DominatorTree &DT) {
+ Value *PointerOperand = L->getPointerOperand()->stripPointerCasts();
+
+ // It's not safe to walk the use list of a global value because function
+ // passes aren't allowed to look outside their functions.
+ // FIXME: this could be fixed by filtering instructions from outside of
+ // current function.
+ if (isa<Constant>(PointerOperand))
+ return nullptr;
+
+ // Queue to process all pointers that are equivalent to load operand.
+ SmallVector<Value *, 8> PointerUsesQueue;
+ PointerUsesQueue.push_back(PointerOperand);
+
+ Instruction *MostDominatingInstruction = L;
+
+ // FIXME: This loop is potentially O(n^2) due to repeated dominates checks.
+ while (!PointerUsesQueue.empty()) {
+ Value *Ptr = PointerUsesQueue.pop_back_val();
+ assert(Ptr && !isa<GlobalValue>(Ptr) &&
+ "Null or GlobalValue should not be inserted");
+
+ for (User *U : Ptr->users()) {
+ auto *I = dyn_cast<Instruction>(U);
+ if (!I || I == L || !DT.dominates(I, MostDominatingInstruction))
+ continue;
+
+ // Add bitcasts and zero GEPs to queue.
+ // TODO: Should drop bitcast?
+ if (isa<BitCastInst>(I) ||
+ (isa<GetElementPtrInst>(I) &&
+ cast<GetElementPtrInst>(I)->hasAllZeroIndices())) {
+ PointerUsesQueue.push_back(I);
+ continue;
+ }
+
+ // If we hit a load/store with an invariant.group metadata and the same
+ // pointer operand, we can assume that value pointed to by the pointer
+ // operand didn't change.
+ if (I->hasMetadata(LLVMContext::MD_invariant_group) &&
+ Ptr == getLoadStorePointerOperand(I) && !I->isVolatile())
+ MostDominatingInstruction = I;
+ }
+ }
+
+ return MostDominatingInstruction != L ? MostDominatingInstruction : nullptr;
+}
+
+// Return the memory location accessed by the (masked) load/store instruction
+// `I`, if the instruction could potentially provide a useful value for
+// eliminating the load.
+static std::optional<MemoryLocation>
+maybeLoadStoreLocation(Instruction *I, bool AllowStores,
+ const TargetLibraryInfo *TLI) {
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ return MemoryLocation::get(LI);
+
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::masked_load:
+ return MemoryLocation::getForArgument(II, 0, TLI);
+ case Intrinsic::masked_store:
+ if (AllowStores)
+ return MemoryLocation::getForArgument(II, 1, TLI);
+ return std::nullopt;
+ default:
+ break;
+ }
+ }
+
+ if (!AllowStores)
+ return std::nullopt;
+
+ if (auto *SI = dyn_cast<StoreInst>(I))
+ return MemoryLocation::get(SI);
+ return std::nullopt;
+}
+
+std::optional<GVNPass::ReachingMemVal> GVNPass::scanMemoryAccessesUsers(
+ const MemoryLocation &Loc, bool IsInvariantLoad, BasicBlock *BB,
+ const SmallVectorImpl<MemoryAccess *> &ClobbersList, MemorySSA &MSSA,
+ BatchAAResults &AA, LoadInst *L) {
+
+ // Prefer a candidate that is closer to the load within the same block.
+ auto UpdateChoice = [&](std::optional<ReachingMemVal> &Choice,
+ AliasResult &AR, Instruction *Candidate) {
+ if (!Choice) {
+ if (AR == AliasResult::PartialAlias)
+ Choice = ReachingMemVal::getClobber(Loc.Ptr, Candidate, AR.getOffset());
+ else
+ Choice = ReachingMemVal::getDef(Loc.Ptr, Candidate);
+ return;
+ }
+ if (!MSSA.locallyDominates(MSSA.getMemoryAccess(Choice->Inst),
+ MSSA.getMemoryAccess(Candidate)))
+ return;
+
+ if (AR == AliasResult::PartialAlias) {
+ Choice->Kind = DepKind::Clobber;
+ Choice->Offset = AR.getOffset();
+ } else {
+ Choice->Kind = DepKind::Def;
+ Choice->Offset = -1;
+ }
+
+ Choice->Inst = Candidate;
+ Choice->Block = Candidate->getParent();
+ };
+
+ std::optional<ReachingMemVal> ReachingVal;
+ for (MemoryAccess *MA : ClobbersList) {
+ unsigned Scanned = 0;
+ for (User *U : MA->users()) {
+ if (++Scanned >= ScanUsersLimit)
+ return ReachingMemVal::getUnknown(BB, Loc.Ptr);
+
+ auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U);
+ if (!UseOrDef || UseOrDef->getBlock() != BB)
+ continue;
+
+ Instruction *MemI = UseOrDef->getMemoryInst();
+ if (MemI == L ||
+ (L && !MSSA.locallyDominates(UseOrDef, MSSA.getMemoryAccess(L))))
+ continue;
+
+ if (auto MaybeLoc = maybeLoadStoreLocation(MemI, IsInvariantLoad, TLI)) {
+ AliasResult AR = AA.alias(*MaybeLoc, Loc);
+ // If the locations do not certainly alias, we cannot possibly infer the
+ // following load loads the same value.
+ if (AR == AliasResult::NoAlias || AR == AliasResult::MayAlias)
+ continue;
+
+ // Locations partially overlap, but neither is a subset of the other, or
+ // the second location is before the first.
+ if (AR == AliasResult::PartialAlias &&
+ (!AR.hasOffset() || AR.getOffset() < 0))
+ continue;
+
+ // Found candidate, the new load memory location and the given location
+ // must alias: precise overlap, or subset with non-negative offset.
+ UpdateChoice(ReachingVal, AR, MemI);
+ }
+ }
+ if (ReachingVal)
+ break;
+ }
+
+ return ReachingVal;
+}
+
+/// Check if a given MemoryAccess (usually a MemoryDef) actually modifies a
+/// given location.
+std::optional<GVNPass::ReachingMemVal> GVNPass::accessMayModifyLocation(
+ MemoryAccess *ClobberMA, const MemoryLocation &Loc, bool IsInvariantLoad,
+ BasicBlock *BB, MemorySSA &MSSA, BatchAAResults &AA) {
+ assert(ClobberMA->getBlock() == BB);
+
+ // If the clobbering access is the entry memory state, we cannot say anything
+ // about the content of the memory, except when we are accessing a local
+ // object, which can be turned later into producing `undef`.
+ if (MSSA.isLiveOnEntryDef(ClobberMA)) {
+ if (auto *Alloc = dyn_cast<AllocaInst>(getUnderlyingObject(Loc.Ptr)))
+ if (Alloc->getParent() == BB)
+ return ReachingMemVal::getDef(Loc.Ptr, const_cast<AllocaInst *>(Alloc));
+ return ReachingMemVal::getUnknown(BB, Loc.Ptr);
+ }
+
+ // Loads from "constant" memory can't be clobbered.
+ if (IsInvariantLoad || AA.pointsToConstantMemory(Loc))
+ return std::nullopt;
+
+ auto GetOrdering = [](const Instruction *I) {
+ if (auto *L = dyn_cast<LoadInst>(I))
+ return L->getOrdering();
+ return cast<StoreInst>(I)->getOrdering();
+ };
+ Instruction *ClobberI = cast<MemoryDef>(ClobberMA)->getMemoryInst();
+
+ // Check if the clobbering access is a load or a store that we can reuse.
+ if (auto MaybeLoc = maybeLoadStoreLocation(ClobberI, true, TLI)) {
+ AliasResult AR = AA.alias(*MaybeLoc, Loc);
+ if (AR == AliasResult::MustAlias)
+ return ReachingMemVal::getDef(Loc.Ptr, ClobberI);
+
+ if (AR == AliasResult::NoAlias) {
+ // If the locations do not alias we may still be able to skip over the
+ // clobbering instruction, even if it is atomic.
+ // The original load is either non-atomic or unordered. We can reorder
+ // these across non-atomic, unordered or monotonic loads or across any
+ // store.
+ if (!ClobberI->isAtomic() ||
+ !isStrongerThan(GetOrdering(ClobberI), AtomicOrdering::Monotonic) ||
+ isa<StoreInst>(ClobberI))
+ return std::nullopt;
+ return ReachingMemVal::getClobber(Loc.Ptr, ClobberI);
+ }
+
+ // Skip over volatile loads (the original load is non-volatile, non-atomic).
+ if (!ClobberI->isAtomic() && isa<LoadInst>(ClobberI))
+ return std::nullopt;
+
+ if (AR == AliasResult::MayAlias ||
+ (AR == AliasResult::PartialAlias &&
+ (!AR.hasOffset() || AR.getOffset() < 0)))
+ return ReachingMemVal::getClobber(Loc.Ptr, ClobberI);
+
+ // The only option left is a store of the superset of the required bits.
+ assert(AR == AliasResult::PartialAlias && AR.hasOffset() &&
+ AR.getOffset() > 0 &&
+ "Must be the superset/partial overlap case with positive offset");
+ return ReachingMemVal::getClobber(Loc.Ptr, ClobberI, AR.getOffset());
+ }
+
+ if (auto *II = dyn_cast<IntrinsicInst>(ClobberI)) {
+ if (isa<DbgInfoIntrinsic>(II))
+ return std::nullopt;
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+ MemoryLocation IIObjLoc = MemoryLocation::getForArgument(II, 0, TLI);
+ if (AA.isMustAlias(IIObjLoc, Loc))
+ return ReachingMemVal::getDef(Loc.Ptr, ClobberI);
+ return std::nullopt;
+ }
+ }
+
+ // If we are at a malloc-like function call, we can turn the load into `undef`
+ // or zero.
+ if (isNoAliasCall(ClobberI)) {
+ const Value *Obj = getUnderlyingObject(Loc.Ptr);
+ if (Obj == ClobberI || AA.isMustAlias(ClobberI, Loc.Ptr))
+ return ReachingMemVal::getDef(Loc.Ptr, ClobberI);
+ }
+
+ // Can reorder loads across a release fence.
+ if (auto *FI = dyn_cast<FenceInst>(ClobberI))
+ if (FI->getOrdering() == AtomicOrdering::Release)
+ return std::nullopt;
+
+ // See if the clobber instruction (e.g., a generic call) may modify the
+ // location.
+ ModRefInfo MR = AA.getModRefInfo(ClobberI, Loc);
+ // If may modify the location, analyze deeper, to exclude accesses to
+ // non-escaping local allocations.
+ if (MR == ModRefInfo::NoModRef || MR == ModRefInfo::Ref)
+ return std::nullopt;
+
+ // Conservatively assume the clobbering memory access may overwrite the
+ // location.
+ return ReachingMemVal::getClobber(Loc.Ptr, ClobberI);
+}
+
+/// Collect the predecessors of block, while doing phi-translation of the memory
+/// address and the memory clobber. Return false if the block should be marked
+/// as clobbering the memory location in an unknown way.
+bool GVNPass::collectPredecessors(BasicBlock *BB, const PHITransAddr &Addr,
+ MemoryAccess *ClobberMA,
+ DependencyBlockSet &Blocks,
+ SmallVectorImpl<BasicBlock *> &Worklist) {
+ if (Addr.needsPHITranslationFromBlock(BB) &&
+ !Addr.isPotentiallyPHITranslatable())
+ return false;
+
+ auto *MPhi =
+ ClobberMA->getBlock() == BB ? dyn_cast<MemoryPhi>(ClobberMA) : nullptr;
+ SmallVector<std::pair<BasicBlock *, DependencyBlockInfo>, 8> Preds;
+ for (BasicBlock *Pred : predecessors(BB)) {
+ // Skip unreachable predecessors.
+ if (!DT->isReachableFromEntry(Pred))
+ continue;
+
+ // Skip already visited predecessors.
+ if (llvm::any_of(Preds, [Pred](const auto &P) { return P.first == Pred; }))
+ continue;
+
+ PHITransAddr TransAddr = Addr;
+ if (TransAddr.needsPHITranslationFromBlock(BB))
+ TransAddr.translateValue(BB, Pred, DT, false);
+
+ auto It = Blocks.find(Pred);
+ if (It != Blocks.end()) {
+ // If we reach a visited block with a different address, set the
+ // current block as clobbering the memory location in an unknown way
+ // (by returning false).
+ if (It->second.Addr.getAddr() != TransAddr.getAddr())
+ return false;
+ // Otherwise, just stop the traversal.
+ continue;
+ }
+
+ Preds.emplace_back(
+ Pred, DependencyBlockInfo(TransAddr,
+ MPhi ? MPhi->getIncomingValueForBlock(Pred)
+ : ClobberMA));
+ }
+
+ // We collected the predecessors and stored them in Preds. Now, populate the
+ // worklist with the predecessors found, and cache the eventual translated
+ // address for each block.
+ for (auto &P : Preds) {
+ [[maybe_unused]] auto It =
+ Blocks.try_emplace(P.first, std::move(P.second)).first;
+ Worklist.push_back(P.first);
+ }
+
+ return true;
+}
+
+/// Gather a list of memory clobbers, such that their memory uses could
+/// potentially alias our memory location.
+void GVNPass::collectClobberList(SmallVectorImpl<MemoryAccess *> &Clobbers,
+ BasicBlock *BB,
+ const DependencyBlockInfo &StartInfo,
+ const DependencyBlockSet &Blocks,
+ MemorySSA &MSSA) {
+ MemoryAccess *MA = StartInfo.InitialClobberMA;
+ MemoryAccess *LastMA = StartInfo.ClobberMA;
+
+ for (;;) {
+ while (MA != LastMA) {
+ Clobbers.push_back(MA);
+ MA = cast<MemoryUseOrDef>(MA)->getDefiningAccess();
+ }
+ Clobbers.push_back(MA);
+
+ if (MSSA.isLiveOnEntryDef(MA) ||
+ (MA->getBlock() == BB && !isa<MemoryPhi>(MA)))
+ break;
+
+ if (MA->getBlock() == BB)
+ BB = DT->getNode(BB)->getIDom()->getBlock();
+ else
+ BB = MA->getBlock();
+
+ auto It = Blocks.find(BB);
+ if (It == Blocks.end())
+ break;
+
+ MA = It->second.InitialClobberMA;
+ LastMA = It->second.ClobberMA;
+ if (MA == Clobbers.back())
+ Clobbers.pop_back();
+ }
+}
+
+/// Find the set of all the reaching memory definitions for the location
+/// referred to by the pointer operand of the given load instruction. Definitely
+/// aliasing memory reads are treated as definitions, for the purposes of this
+/// function.
+bool GVNPass::findReachingValuesForLoad(LoadInst *L,
+ SmallVectorImpl<ReachingMemVal> &Values,
+ MemorySSA &MSSA, AAResults &AAR) {
+ EarliestEscapeAnalysis EA(*DT, LI);
+ BatchAAResults AA(AAR, &EA);
+ BasicBlock *StartBlock = L->getParent();
+ bool IsInvariantLoad = L->hasMetadata(LLVMContext::MD_invariant_load);
+ MemoryAccess *ClobberMA = MSSA.getMemoryAccess(L)->getDefiningAccess();
+ const MemoryLocation Loc = MemoryLocation::get(L);
+
+ // Fast path for load tagged with !invariant.group.
+ if (L->hasMetadata(LLVMContext::MD_invariant_group)) {
+ if (Instruction *G = findInvariantGroupValue(L, *DT)) {
+ Values.emplace_back(
+ ReachingMemVal::getDef(getLoadStorePointerOperand(G), G));
+ return true;
+ }
+ }
+
+ // First off, look for a local dependency. Doing this allows us to avoid
+ // having to disambiguate between the parts of the initial basic block before
+ // and after the original load instruction (when entered from a backedge).
+ do {
+ // Scan users of the clobbering memory access.
+ if (auto RMV = scanMemoryAccessesUsers(
+ Loc, IsInvariantLoad, StartBlock,
+ SmallVector<MemoryAccess *, 1>{ClobberMA}, MSSA, AA, L)) {
+ Values.emplace_back(*RMV);
+ return true;
+ }
+
+ // Proceed visiting predecessors if the clobbering access is non-local or it
+ // is a MemoryPhi.
+ if (ClobberMA->getBlock() != StartBlock || isa<MemoryPhi>(ClobberMA))
+ break;
+
+ // Check if the clobber actually aliases the load location.
+ if (auto RMV = accessMayModifyLocation(ClobberMA, Loc, IsInvariantLoad,
+ StartBlock, MSSA, AA)) {
+ Values.emplace_back(*RMV);
+ return true;
+ }
+
+ // It may happen that the clobbering memory access does not actually
+ // clobber our load location, transition to its defining memory access.
+ ClobberMA = cast<MemoryUseOrDef>(ClobberMA)->getDefiningAccess();
+ } while (ClobberMA->getBlock() == StartBlock);
+
+ // Non-local speculations are not allowed under ASan.
+ if (L->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
+ L->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
+ return false;
+
+ // Walk backwards through the CFG, collecting blocks along the way,
+ // terminating at blocks/instructions, which definitely define our memory
+ // location (perhaps in an unknown way). Start off by collecting the
+ // predecessors of the initial basic block as starting points for the walk.
+ DependencyBlockSet Blocks;
+ SmallVector<BasicBlock *, 16> InitialWorklist;
+ const DataLayout &DL = L->getModule()->getDataLayout();
+ if (!collectPredecessors(StartBlock,
+ PHITransAddr(L->getPointerOperand(), DL, AC),
+ ClobberMA, Blocks, InitialWorklist))
+ return false;
+
+ // Do a bottom-up depth-first search.
+ auto Worklist = InitialWorklist;
+ while (!Worklist.empty()) {
+ auto *BB = Worklist.pop_back_val();
+ DependencyBlockInfo &Info = Blocks.find(BB)->second;
+
+ // Phi-translation may have failed.
+ if (!Info.Addr.getAddr())
+ continue;
+
+ // If the clobbering memory access is in the current block and it indeed
+ // clobbers our load location, record the dependency and stop the
+ // traversal.
----------------
momchil-velikov wrote:
Do you mean the "stop traversal" part? It means don't go to the predecessors of the current block.
https://github.com/llvm/llvm-project/pull/152859
More information about the llvm-commits
mailing list