[llvm] [NewGVN][2/3] Load coercion between loads that have live-on-entry definitions (PR #68666)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 9 23:39:51 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Konstantina Mitropoulou (kmitropoulou)
<details>
<summary>Changes</summary>
In the following example, both %V1 and %V2 have live-on-entry definitions and
their memory locations are overlapping. After load coercion the value of %V2
is extracted from %V1 and the uses of %V2 are updated accordingly.
```
Before load coercion
BB1
%V1 = load <2 x i32>, ptr %P, align 1
%V2 = load i32, ptr %P, align 1
%V3 = add i32 %V2, 42
After load coercion
BB1
%V1 = load <2 x i32>, ptr %P, align 1
%0 = bitcast <2 x i32> %V1 to i64
%1 = trunc i64 %0 to i32
%V3 = add i32 %1, 42
```
---
Patch is 60.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/68666.diff
4 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/NewGVN.cpp (+590-51)
- (added) llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll (+424)
- (added) llvm/test/Transforms/NewGVN/load_coercion_between_store_and_load.ll (+341)
- (modified) llvm/test/Transforms/NewGVN/pr14166-xfail.ll (-1)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 19ac9526b5f88b6..140ec02572db7de 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -73,9 +73,11 @@
#include "llvm/Analysis/CFGPrinter.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstructionPrecedenceTracking.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
@@ -154,6 +156,10 @@ static cl::opt<bool> EnableStoreRefinement("enable-store-refinement",
static cl::opt<bool> EnablePhiOfOps("enable-phi-of-ops", cl::init(true),
cl::Hidden);
+// Enables load coercion for non-constant values.
+static cl::opt<bool> EnableLoadCoercion("enable-load-coercion", cl::init(true),
+ cl::Hidden);
+
//===----------------------------------------------------------------------===//
// GVN Pass
//===----------------------------------------------------------------------===//
@@ -495,6 +501,7 @@ class NewGVN {
AssumptionCache *AC = nullptr;
const DataLayout &DL;
std::unique_ptr<PredicateInfo> PredInfo;
+ ImplicitControlFlowTracking *ICF = nullptr;
// These are the only two things the create* functions should have
// side-effects on due to allocating memory.
@@ -653,6 +660,16 @@ class NewGVN {
// Deletion info.
SmallPtrSet<Instruction *, 8> InstructionsToErase;
+ // Map candidate load to their depending instructions.
+ mutable std::map<Value *, DenseSet<std::pair<Instruction *, BasicBlock *>>>
+ LoadCoercion;
+
+ // Keep newly generated loads.
+ SmallVector<Instruction *, 2> NewLoadsInLoadCoercion;
+
+ // Keep newly generated instructions.
+ SmallVector<Instruction *, 2> NewlyGeneratedInsns;
+
public:
NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA,
@@ -776,9 +793,9 @@ class NewGVN {
ExprResult checkExprResults(Expression *, Instruction *, Value *) const;
ExprResult performSymbolicEvaluation(Instruction *,
SmallPtrSetImpl<Value *> &) const;
- const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *,
- Instruction *,
- MemoryAccess *) const;
+ const Expression *createLoadExpAndUpdateMemUses(LoadInst *, Value *,
+ MemoryAccess *,
+ MemoryAccess *) const;
const Expression *performSymbolicLoadEvaluation(Instruction *) const;
const Expression *performSymbolicStoreEvaluation(Instruction *) const;
ExprResult performSymbolicCallEvaluation(Instruction *) const;
@@ -853,6 +870,7 @@ class NewGVN {
// Utilities.
void cleanupTables();
std::pair<unsigned, unsigned> assignDFSNumbers(BasicBlock *, unsigned);
+ unsigned updateDFSNumbers(unsigned);
void updateProcessedCount(const Value *V);
void verifyMemoryCongruency() const;
void verifyIterationSettled(Function &F);
@@ -893,6 +911,43 @@ class NewGVN {
// Debug counter info. When verifying, we have to reset the value numbering
// debug counter to the same state it started in to get the same results.
int64_t StartingVNCounter = 0;
+
+ // The following functions are used in load coercion:
+ // Try to add the load along with the depending instruction(s) in
+ // LoadCoercion map.
+ bool tryAddLoadDepInsnIntoLoadCoercionMap(LoadInst *, Instruction *,
+ BasicBlock *) const;
+ // Check if the candidate load can be optimized by another load which is also
+ // a live of entry definition and add it in LoadCoercion map.
+ bool findLiveOnEntryDependency(LoadInst *, LoadInst *, ArrayRef<BasicBlock *>,
+ bool) const;
+ // Collect the load instructions that can be optimized with load coercion.
+ // The filtering of the load instructions is based the type of their memory
+ // access.
+ bool performSymbolicLoadCoercionForNonConstantMemoryDef(LoadInst *,
+ StoreInst *,
+ MemoryAccess *) const;
+ const Expression *performSymbolicLoadCoercionForConstantMemoryDef(
+ Type *, Value *, LoadInst *, Instruction *, MemoryAccess *) const;
+ bool performSymbolicLoadCoercionForLiveOnEntryDef(LoadInst *,
+ MemoryAccess *) const;
+ // Code generation for load coercion. Replaces the load with the right
+ // instruction or the right sequence of instructions.
+ bool implementLoadCoercion();
+ // Update MemorySSA with the load instructions that are emitted during load
+ // coercion.
+ void updateMemorySSA(Instruction *, Instruction *);
+ // Extract the value that will replace the load from the depending
+ // instruction.
+ Value *getExtractedValue(LoadInst *, Instruction *);
+ // If load coercion is successful, the uses of the optimized load might need
+ // to be added to new congruence classes in order to optimize the code
+ // further. For this reason, we run value numbering for all the uses of the
+ // optimized load. If load coercion has failed, then we need to add the load
+ // (and its uses) to the right congruence class.
+ void updateUsesAfterLoadCoercionImpl(LoadInst *,
+ SmallVectorImpl<Instruction *> &);
+ void updateUsesAfterLoadCoercion(LoadInst *, Value *);
};
} // end anonymous namespace
@@ -1439,12 +1494,249 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const {
return createStoreExpression(SI, StoreAccess);
}
+// A load can have one or more dependencies as the following examples show:
+//
+// Example 1:
+// BB1:
+// ...
+// store i32 %V1, ptr %P
+// ...
+// %V2 = load i32, ptr %P
+// ...
+//
+// Example 2:
+// BB1: BB2:
+// store i32 %V1, ptr %P %V2 = load i32, ptr %P
+// br label %BB3 br label %BB3
+// \ /
+// BB3:
+// %V3 = load i32, ptr %P
+//
+// In the first example, the load (%V2) has only one dependency. In the second
+// example, the load (%V3) has two dependencies. Therefore, we add the load
+// along with its two dependencies in LoadCoercion map. However, this is not
+// always the case as it is shown below:
+//
+// Example 3:
+// BB1:
+// %V1 = load <4 x i32>, ptr %P
+// br i1 %cond, label %BB2, label %BB3
+// / \
+// BB2: BB3:
+// %V2 = load <2 x i32>, ptr %P %V3 = load i32, ptr %P
+// br label %BB4 br label %BB4
+// \ /
+// BB4:
+// %V4 = load i32, ptr %P
+//
+// The %V4 load can be optimized by any of the loads (%V1, %V2, %V3). The loads
+// %V2 and %V3 can also be optimized by %V1. For this reason, we need to do an
+// extra check before we add the load in the map. Hence, we check if the load is
+// already in the map and if the existing depending instruction dominates the
+// current depending instruction. If so, then we do not add the new depending
+// instruction in LoadCoercion map. If the current depending instruction
+// dominates the existing depending instruction, then we remove the existing
+// depending instruction from LoadCoercion map and we add the current depending
+// instruction. In Example 3, the %V4 load has only one dependency (%V1) and we
+// add only this one in LoadCoercion map.
+bool NewGVN::tryAddLoadDepInsnIntoLoadCoercionMap(
+ LoadInst *LI, Instruction *CurrentDepI, BasicBlock *CurrentDepIBB) const {
+ // Can't forward from non-atomic to atomic without violating memory model.
+ if (LI->isAtomic() > CurrentDepI->isAtomic())
+ return false;
+
+ if (auto *DepLI = dyn_cast<LoadInst>(CurrentDepI))
+ if (LI->getAlign() < DepLI->getAlign())
+ return false;
+
+ if (auto *DepSI = dyn_cast<StoreInst>(CurrentDepI))
+ if (LI->getAlign() < DepSI->getAlign())
+ return false;
+
+ // Check if LI already exists in LoadCoercion map.
+ auto It = LoadCoercion.find(LI);
+ if (It != LoadCoercion.end()) {
+ auto &ExistingDepInsns = It->second;
+ // Iterate over all the existing depending instructions of LI.
+ for (auto &P : llvm::make_early_inc_range(ExistingDepInsns)) {
+ Instruction *ExistingDepI = P.first;
+ if (MSSAWalker->getClobberingMemoryAccess(getMemoryAccess(CurrentDepI)) ==
+ MSSAWalker->getClobberingMemoryAccess(
+ getMemoryAccess(ExistingDepI)) &&
+ isa<LoadInst>(ExistingDepI) && isa<LoadInst>(CurrentDepI)) {
+ // If the existing depending instruction dominates the current depending
+ // instruction, then we should not add the current depending instruction
+ // in LoadCoercion map (Example 3).
+ if (DT->dominates(ExistingDepI, CurrentDepI))
+ return true;
+
+ // If the current depending instruction dominates the existing one, then
+ // we remove the existing depending instruction from the LoadCoercion
+ // map. Next, we add the current depending instruction in LoadCoercion
+ // map.
+ if (DT->dominates(CurrentDepI, ExistingDepI))
+ ExistingDepInsns.erase(P);
+ }
+ }
+ }
+ // Add the load and the corresponding depending instruction in LoadCoercion
+ // map.
+ LoadCoercion[LI].insert(std::make_pair(CurrentDepI, CurrentDepIBB));
+ return true;
+}
+
+// Check if it is possible to apply load coercion between CandidateLI and
+// DependingLoad.
+bool NewGVN::findLiveOnEntryDependency(LoadInst *CandidateLI,
+ LoadInst *DependingLoad,
+ ArrayRef<BasicBlock *> DependingBlocks,
+ bool IsMemoryPhiDep) const {
+ int Offset = -1;
+
+ if (!DependingLoad || CandidateLI == DependingLoad ||
+ DependingLoad->getNumUses() == 0)
+ return false;
+
+ BasicBlock *DependingLoadBB = DependingLoad->getParent();
+ if (!ReachableBlocks.count(DependingLoadBB) ||
+ ICF->isDominatedByICFIFromSameBlock(CandidateLI))
+ return false;
+
+ if (InstructionsToErase.count(DependingLoad))
+ return false;
+
+ // We do not look deep in the CFG. We consider either instructions that
+ // dominate CandidateLI or instructions that are in one of the predecessors of
+ // CandidateLI.
+ if (DT->dominates(DependingLoad, CandidateLI))
+ Offset = analyzeLoadFromClobberingLoad(CandidateLI->getType(),
+ CandidateLI->getPointerOperand(),
+ DependingLoad, DL);
+ else {
+ BasicBlock *CandidateLIBB = CandidateLI->getParent();
+ auto It1 = llvm::find(DependingBlocks, CandidateLIBB);
+ auto It2 = llvm::find(DependingBlocks, DependingLoadBB);
+ auto Ite = DependingBlocks.end();
+ if (It1 == Ite && It2 != Ite && !isBackedge(DependingLoadBB, CandidateLIBB))
+ Offset = analyzeLoadFromClobberingLoad(CandidateLI->getType(),
+ CandidateLI->getPointerOperand(),
+ DependingLoad, DL);
+ }
+
+ bool IsLoadCoercionCandidate = false;
+ if (Offset >= 0) {
+ // If the candidate load depends on a MemoryPhi, then we do not consider the
+ // parent block of the depending instruction, but instead it is more
+ // convenient to consider the basic block of the MemoryPhi from which the
+ // value comes e.g.:
+ // BB1:
+ // %V1 = load i32, ptr %P
+ // br i1 %Cond, label %BB2, label %BB3
+ // / \
+ // BB2: BB3:
+ // store i32 100, ptr %P br label %BB4
+ // br label %BB4 /
+ // \ /
+ // BB4:
+ // %V2 = load i32, ptr %P
+ //
+ BasicBlock *BB = IsMemoryPhiDep ? DependingBlocks.back() : DependingLoadBB;
+ IsLoadCoercionCandidate |=
+ tryAddLoadDepInsnIntoLoadCoercionMap(CandidateLI, DependingLoad, BB);
+ }
+ return IsLoadCoercionCandidate;
+}
+
+// Find load coercion opportunities between instructions with live on entry
+// definitions.
+bool NewGVN::performSymbolicLoadCoercionForLiveOnEntryDef(
+ LoadInst *LI, MemoryAccess *DefiningAccess) const {
+ bool IsLoadCoercionCandidate = false;
+ for (const auto &U : MSSA->getLiveOnEntryDef()->uses()) {
+ if (auto *MemUse = dyn_cast<MemoryUse>(U.getUser())) {
+ // TODO: Add support for calls.
+ LoadInst *DependingLoad = dyn_cast<LoadInst>(MemUse->getMemoryInst());
+ if (!DependingLoad || LI == DependingLoad)
+ continue;
+
+ // If the two instructions have the same type, then there is a load
+ // coercion opportunity only if the LI and the DependingLoad are in
+ // different basic blocks and the basic block of the DependingLoad is one
+ // of the predecessors of the basic block of the LI. For any other case,
+ // the LI will be eliminated by adding the two loads in the same
+ // congruence class.
+ //
+ // Example 1: Here, we do not need to apply load coercion. The two load
+ // will be added in the same congruence class and %V2 will be eliminated.
+ //
+ // BB1:
+ // ...
+ // %V1 = load i32, ptr %P
+ // br label %BB2
+ //
+ // BB2
+ // ...
+ // %V2 = load i32, ptr %P
+ // ...
+ //
+ // Example 2: Here, %V2 can be replaced by a phi node.
+ // BB1: BB2:
+ // %V1 = load <2 x i32>, ptr %P br label %BB3
+ // br label %BB3 /
+ // \ /
+ // BB3:
+ // %V2 = load i32, ptr %P
+ //
+ // Hence, the code will become:
+ // BB1: BB2:
+ // %V1 = load <2 x i32>, ptr %P %V2' = load i32, ptr %P
+ // %0 = bitcast <2 x i32> %V1 to i64 br label %BB3
+ // %1 = trunc i64 %0 to i32 /
+ // br label %BB3 /
+ // \ /
+ // BB3:
+ // %V2 = phi i32 [ %1, %BB1], [ %V2', %BB2 ]
+ //
+ if (DependingLoad->getType() == LI->getType() &&
+ (DT->dominates(DependingLoad, LI) ||
+ LI->getParent() == DependingLoad->getParent()))
+ continue;
+
+ SmallVector<BasicBlock *, 2> Preds;
+ for (auto *BB : predecessors(LI->getParent()))
+ Preds.push_back(BB);
+ IsLoadCoercionCandidate |=
+ findLiveOnEntryDependency(LI, DependingLoad, Preds, false);
+ }
+ }
+ return IsLoadCoercionCandidate;
+}
+
+// Find load coercion opportunities between load (LI) and store instructions
+// (DepSI).
+bool NewGVN::performSymbolicLoadCoercionForNonConstantMemoryDef(
+ LoadInst *LI, StoreInst *DepSI, MemoryAccess *DefiningAccess) const {
+ Type *LoadType = LI->getType();
+ bool IsLoadCoercionCandidate = false;
+ if (LI->isAtomic() > DepSI->isAtomic() ||
+ LoadType == DepSI->getValueOperand()->getType())
+ return false;
+
+ int Offset = analyzeLoadFromClobberingStore(
+ LoadType, lookupOperandLeader(LI->getPointerOperand()), DepSI, DL);
+ if (Offset >= 0) {
+ IsLoadCoercionCandidate |=
+ tryAddLoadDepInsnIntoLoadCoercionMap(LI, DepSI, DepSI->getParent());
+ }
+
+ return IsLoadCoercionCandidate;
+}
+
// See if we can extract the value of a loaded pointer from a load, a store, or
// a memory instruction.
-const Expression *
-NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr,
- LoadInst *LI, Instruction *DepInst,
- MemoryAccess *DefiningAccess) const {
+const Expression *NewGVN::performSymbolicLoadCoercionForConstantMemoryDef(
+ Type *LoadType, Value *LoadPtr, LoadInst *LI, Instruction *DepInst,
+ MemoryAccess *DefiningAccess) const {
assert((!LI || LI->isSimple()) && "Not a simple load");
if (auto *DepSI = dyn_cast<StoreInst>(DepInst)) {
// Can't forward from non-atomic to atomic without violating memory model.
@@ -1464,21 +1756,6 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr,
}
}
}
- } else if (auto *DepLI = dyn_cast<LoadInst>(DepInst)) {
- // Can't forward from non-atomic to atomic without violating memory model.
- if (LI->isAtomic() > DepLI->isAtomic())
- return nullptr;
- int Offset = analyzeLoadFromClobberingLoad(LoadType, LoadPtr, DepLI, DL);
- if (Offset >= 0) {
- // We can coerce a constant load into a load.
- if (auto *C = dyn_cast<Constant>(lookupOperandLeader(DepLI)))
- if (auto *PossibleConstant =
- getConstantValueForLoad(C, Offset, LoadType, DL)) {
- LLVM_DEBUG(dbgs() << "Coercing load from load " << *LI
- << " to constant " << *PossibleConstant << "\n");
- return createConstantExpression(PossibleConstant);
- }
- }
} else if (auto *DepMI = dyn_cast<MemIntrinsic>(DepInst)) {
int Offset = analyzeLoadFromClobberingMemInst(LoadType, LoadPtr, DepMI, DL);
if (Offset >= 0) {
@@ -1510,11 +1787,24 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr,
return createConstantExpression(UndefValue::get(LoadType));
} else if (auto *InitVal =
getInitialValueOfAllocation(DepInst, TLI, LoadType))
- return createConstantExpression(InitVal);
+ return createConstantExpression(InitVal);
return nullptr;
}
+const Expression *
+NewGVN::createLoadExpAndUpdateMemUses(LoadInst *LI, Value *LoadAddressLeader,
+ MemoryAccess *OriginalAccess,
+ MemoryAccess *DefiningAccess) const {
+ const auto *LE = createLoadExpression(LI->getType(), LoadAddressLeader, LI,
+ DefiningAccess);
+ // If our MemoryLeader is not our defining access, add a use to the
+ // MemoryLeader, so that we get reprocessed when it changes.
+ if (LE->getMemoryLeader() != DefiningAccess)
+ addMemoryUsers(LE->getMemoryLeader(), OriginalAccess);
+ return LE;
+}
+
const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const {
auto *LI = cast<LoadInst>(I);
@@ -1531,30 +1821,62 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const {
MemoryAccess *DefiningAccess =
MSSAWalker->getClobberingMemoryAccess(OriginalAccess);
- if (!MSSA->isLiveOnEntryDef(DefiningAccess)) {
- if (auto *MD = dyn_cast<MemoryDef>(DefiningAccess)) {
- Instruction *DefiningInst = MD->getMemoryInst();
- // If the defining instruction is not reachable, replace with poison.
- if (!ReachableBlocks.count(DefiningInst->getParent()))
- return createConstantExpression(PoisonValue::get(LI->getType()));
- // This will handle stores and memory insts. We only do if it the
- // defining access has a different type, or it is a pointer produced by
- // certain memory operations that cause the memory to have a fixed value
- // (IE things like calloc).
- if (const auto *CoercionResult =
- performSymbolicLoadCoercion(LI->getType(), LoadAddressLeader, LI,
- DefiningInst, DefiningAccess))
- return CoercionResult;
+ // Do not apply load coercion to load instructions that are candidates of
+ // phi-of-ops optimization.
+ if (TempToBlock.count(LI))
+ return createLoadExpAndUpdateMemUses(LI, LoadAddressLeade...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/68666
More information about the llvm-commits
mailing list