[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79175 (PR #80274)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Feb 5 11:42:27 PST 2024
https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/80274
>From aa6980841e587eba9c98bf54c51f5414f8a15871 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Wed, 24 Jan 2024 12:33:57 +0100
Subject: [PATCH 1/3] [Loads] Use BatchAAResults for available value APIs
(NFCI)
This allows caching AA queries both within and across the calls,
and enables us to use a custom AAQI configuration.
(cherry picked from commit 89dae798cc77789a43e9a60173f647dae03a65fe)
---
llvm/include/llvm/Analysis/Loads.h | 12 ++++++------
llvm/lib/Analysis/Lint.cpp | 3 ++-
llvm/lib/Analysis/Loads.cpp | 9 ++++-----
.../InstCombine/InstCombineLoadStoreAlloca.cpp | 3 ++-
llvm/lib/Transforms/Scalar/JumpThreading.cpp | 11 ++++++-----
5 files changed, 20 insertions(+), 18 deletions(-)
diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h
index 2880ed33a34cbc..0926093bba99de 100644
--- a/llvm/include/llvm/Analysis/Loads.h
+++ b/llvm/include/llvm/Analysis/Loads.h
@@ -18,7 +18,7 @@
namespace llvm {
-class AAResults;
+class BatchAAResults;
class AssumptionCache;
class DataLayout;
class DominatorTree;
@@ -129,11 +129,10 @@ extern cl::opt<unsigned> DefMaxInstsToScan;
/// location in memory, as opposed to the value operand of a store.
///
/// \returns The found value, or nullptr if no value is found.
-Value *FindAvailableLoadedValue(LoadInst *Load,
- BasicBlock *ScanBB,
+Value *FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan = DefMaxInstsToScan,
- AAResults *AA = nullptr,
+ BatchAAResults *AA = nullptr,
bool *IsLoadCSE = nullptr,
unsigned *NumScanedInst = nullptr);
@@ -141,7 +140,8 @@ Value *FindAvailableLoadedValue(LoadInst *Load,
/// FindAvailableLoadedValue() for the case where we are not interested in
/// finding the closest clobbering instruction if no available load is found.
/// This overload cannot be used to scan across multiple blocks.
-Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE,
+Value *FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA,
+ bool *IsLoadCSE,
unsigned MaxInstsToScan = DefMaxInstsToScan);
/// Scan backwards to see if we have the value of the given pointer available
@@ -170,7 +170,7 @@ Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE,
Value *findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy,
bool AtLeastAtomic, BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
- unsigned MaxInstsToScan, AAResults *AA,
+ unsigned MaxInstsToScan, BatchAAResults *AA,
bool *IsLoadCSE, unsigned *NumScanedInst);
/// Returns true if a pointer value \p A can be replace with another pointer
diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp
index 1ebc593016bc0d..16635097d20afe 100644
--- a/llvm/lib/Analysis/Lint.cpp
+++ b/llvm/lib/Analysis/Lint.cpp
@@ -657,11 +657,12 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
BasicBlock::iterator BBI = L->getIterator();
BasicBlock *BB = L->getParent();
SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
+ BatchAAResults BatchAA(*AA);
for (;;) {
if (!VisitedBlocks.insert(BB).second)
break;
if (Value *U =
- FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA))
+ FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, &BatchAA))
return findValueImpl(U, OffsetOk, Visited);
if (BBI != BB->begin())
break;
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 97d21db86abf28..6bf0d2f56eb4eb 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -450,11 +450,10 @@ llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden,
"to scan backward from a given instruction, when searching for "
"available loaded value"));
-Value *llvm::FindAvailableLoadedValue(LoadInst *Load,
- BasicBlock *ScanBB,
+Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan,
- AAResults *AA, bool *IsLoad,
+ BatchAAResults *AA, bool *IsLoad,
unsigned *NumScanedInst) {
// Don't CSE load that is volatile or anything stronger than unordered.
if (!Load->isUnordered())
@@ -583,7 +582,7 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr,
Value *llvm::findAvailablePtrLoadStore(
const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic,
BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan,
- AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst) {
+ BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst) {
if (MaxInstsToScan == 0)
MaxInstsToScan = ~0U;
@@ -664,7 +663,7 @@ Value *llvm::findAvailablePtrLoadStore(
return nullptr;
}
-Value *llvm::FindAvailableLoadedValue(LoadInst *Load, AAResults &AA,
+Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA,
bool *IsLoadCSE,
unsigned MaxInstsToScan) {
const DataLayout &DL = Load->getModule()->getDataLayout();
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index bb2a77daa60a76..1254a050027a45 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1032,7 +1032,8 @@ Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
// where there are several consecutive memory accesses to the same location,
// separated by a few arithmetic operations.
bool IsLoadCSE = false;
- if (Value *AvailableVal = FindAvailableLoadedValue(&LI, *AA, &IsLoadCSE)) {
+ BatchAAResults BatchAA(*AA);
+ if (Value *AvailableVal = FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE)) {
if (IsLoadCSE)
combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 8603c5cf9c022c..d7d689f58a0708 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1260,8 +1260,9 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
// the entry to its block.
BasicBlock::iterator BBIt(LoadI);
bool IsLoadCSE;
+ BatchAAResults BatchAA(*AA);
if (Value *AvailableVal = FindAvailableLoadedValue(
- LoadI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) {
+ LoadI, LoadBB, BBIt, DefMaxInstsToScan, &BatchAA, &IsLoadCSE)) {
// If the value of the load is locally available within the block, just use
// it. This frequently occurs for reg2mem'd allocas.
@@ -1322,9 +1323,9 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB),
LocationSize::precise(DL.getTypeStoreSize(AccessTy)),
AATags);
- PredAvailable = findAvailablePtrLoadStore(Loc, AccessTy, LoadI->isAtomic(),
- PredBB, BBIt, DefMaxInstsToScan,
- AA, &IsLoadCSE, &NumScanedInst);
+ PredAvailable = findAvailablePtrLoadStore(
+ Loc, AccessTy, LoadI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan,
+ &BatchAA, &IsLoadCSE, &NumScanedInst);
// If PredBB has a single predecessor, continue scanning through the
// single predecessor.
@@ -1336,7 +1337,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
BBIt = SinglePredBB->end();
PredAvailable = findAvailablePtrLoadStore(
Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt,
- (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE,
+ (DefMaxInstsToScan - NumScanedInst), &BatchAA, &IsLoadCSE,
&NumScanedInst);
}
}
>From a581690c57d153f329ded71004a8616b93cb88ca Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Wed, 24 Jan 2024 15:03:24 +0100
Subject: [PATCH 2/3] [JumpThreading] Add test for #79175 (NFC)
(cherry picked from commit 7143b451d71fe314730f7610d7908e3b9611815c)
---
llvm/test/Transforms/JumpThreading/pr79175.ll | 64 +++++++++++++++++++
1 file changed, 64 insertions(+)
create mode 100644 llvm/test/Transforms/JumpThreading/pr79175.ll
diff --git a/llvm/test/Transforms/JumpThreading/pr79175.ll b/llvm/test/Transforms/JumpThreading/pr79175.ll
new file mode 100644
index 00000000000000..6815aabb26dfc6
--- /dev/null
+++ b/llvm/test/Transforms/JumpThreading/pr79175.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=jump-threading < %s | FileCheck %s
+
+ at f = external global i32
+
+; Make sure the value of @f is reloaded prior to the final comparison.
+; FIXME: This is a miscompile.
+define i32 @test(i64 %idx, i32 %val) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: i64 [[IDX:%.*]], i32 [[VAL:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[IDX]], 1
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[RETURN:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[F:%.*]] = load i32, ptr @f, align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[F]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[COND_END_THREAD:%.*]], label [[COND_END:%.*]]
+; CHECK: cond.end:
+; CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt i32 [[VAL]], 0
+; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[CMP_I]]
+; CHECK-NEXT: br i1 [[COND_FR]], label [[COND_END_THREAD]], label [[TMP0:%.*]]
+; CHECK: cond.end.thread:
+; CHECK-NEXT: [[F_RELOAD_PR:%.*]] = load i32, ptr @f, align 4
+; CHECK-NEXT: br label [[TMP0]]
+; CHECK: 0:
+; CHECK-NEXT: [[F_RELOAD:%.*]] = phi i32 [ [[F]], [[COND_END]] ], [ [[F_RELOAD_PR]], [[COND_END_THREAD]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ 0, [[COND_END_THREAD]] ], [ [[VAL]], [[COND_END]] ]
+; CHECK-NEXT: [[F_IDX:%.*]] = getelementptr inbounds i32, ptr @f, i64 [[IDX]]
+; CHECK-NEXT: store i32 [[TMP1]], ptr [[F_IDX]], align 4
+; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[F_RELOAD]], 1
+; CHECK-NEXT: br i1 [[CMP3]], label [[RETURN2:%.*]], label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: ret i32 0
+; CHECK: return2:
+; CHECK-NEXT: ret i32 1
+;
+entry:
+ %cmp = icmp slt i64 %idx, 1
+ br i1 %cmp, label %for.body, label %return
+
+for.body:
+ %f = load i32, ptr @f, align 4
+ %cmp1 = icmp eq i32 %f, 0
+ br i1 %cmp1, label %cond.end, label %cond.false
+
+cond.false:
+ br label %cond.end
+
+cond.end:
+ %phi = phi i32 [ %val, %cond.false ], [ 1, %for.body ]
+ %cmp.i = icmp sgt i32 %phi, 0
+ %sel = select i1 %cmp.i, i32 0, i32 %phi
+ %f.idx = getelementptr inbounds i32, ptr @f, i64 %idx
+ store i32 %sel, ptr %f.idx, align 4
+ %f.reload = load i32, ptr @f, align 4
+ %cmp3 = icmp slt i32 %f.reload, 1
+ br i1 %cmp3, label %return2, label %return
+
+return:
+ ret i32 0
+
+return2:
+ ret i32 1
+}
>From 28879ab8276e7237bfc86f4c7d7890fd4311d334 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Wed, 31 Jan 2024 15:23:53 +0100
Subject: [PATCH 3/3] [AA][JumpThreading] Don't use DomTree for AA in
JumpThreading (#79294)
JumpThreading may perform AA queries while the dominator tree is not up
to date, which may result in miscompilations.
Fix this by adding a new AAQI option to disable the use of the dominator
tree in BasicAA.
Fixes https://github.com/llvm/llvm-project/issues/79175.
(cherry picked from commit 4f32f5d5720fbef06672714a62376f236a36aef5)
---
llvm/include/llvm/Analysis/AliasAnalysis.h | 7 +++++++
llvm/include/llvm/Analysis/BasicAliasAnalysis.h | 14 ++++++++++----
llvm/lib/Analysis/BasicAliasAnalysis.cpp | 6 ++++--
llvm/lib/Transforms/Scalar/JumpThreading.cpp | 2 ++
llvm/test/Transforms/JumpThreading/pr79175.ll | 4 +---
5 files changed, 24 insertions(+), 9 deletions(-)
diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h
index d6f732d35fd4cd..e8e4f491be5a3d 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -287,6 +287,10 @@ class AAQueryInfo {
/// store %l, ...
bool MayBeCrossIteration = false;
+ /// Whether alias analysis is allowed to use the dominator tree, for use by
+ /// passes that lazily update the DT while performing AA queries.
+ bool UseDominatorTree = true;
+
AAQueryInfo(AAResults &AAR, CaptureInfo *CI) : AAR(AAR), CI(CI) {}
};
@@ -668,6 +672,9 @@ class BatchAAResults {
void enableCrossIterationMode() {
AAQI.MayBeCrossIteration = true;
}
+
+ /// Disable the use of the dominator tree during alias analysis queries.
+ void disableDominatorTree() { AAQI.UseDominatorTree = false; }
};
/// Temporary typedef for legacy code that uses a generic \c AliasAnalysis
diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
index afc1811239f283..7eca82729430dd 100644
--- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -43,20 +43,26 @@ class BasicAAResult : public AAResultBase {
const Function &F;
const TargetLibraryInfo &TLI;
AssumptionCache &AC;
- DominatorTree *DT;
+ /// Use getDT() instead of accessing this member directly, in order to
+ /// respect the AAQI.UseDominatorTree option.
+ DominatorTree *DT_;
+
+ DominatorTree *getDT(const AAQueryInfo &AAQI) const {
+ return AAQI.UseDominatorTree ? DT_ : nullptr;
+ }
public:
BasicAAResult(const DataLayout &DL, const Function &F,
const TargetLibraryInfo &TLI, AssumptionCache &AC,
DominatorTree *DT = nullptr)
- : DL(DL), F(F), TLI(TLI), AC(AC), DT(DT) {}
+ : DL(DL), F(F), TLI(TLI), AC(AC), DT_(DT) {}
BasicAAResult(const BasicAAResult &Arg)
: AAResultBase(Arg), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), AC(Arg.AC),
- DT(Arg.DT) {}
+ DT_(Arg.DT_) {}
BasicAAResult(BasicAAResult &&Arg)
: AAResultBase(std::move(Arg)), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI),
- AC(Arg.AC), DT(Arg.DT) {}
+ AC(Arg.AC), DT_(Arg.DT_) {}
/// Handle invalidation events in the new pass manager.
bool invalidate(Function &Fn, const PreservedAnalyses &PA,
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 3178e2d2781674..1028b52a79123f 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -89,7 +89,7 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
// may be created without handles to some analyses and in that case don't
// depend on them.
if (Inv.invalidate<AssumptionAnalysis>(Fn, PA) ||
- (DT && Inv.invalidate<DominatorTreeAnalysis>(Fn, PA)))
+ (DT_ && Inv.invalidate<DominatorTreeAnalysis>(Fn, PA)))
return true;
// Otherwise this analysis result remains valid.
@@ -1063,6 +1063,7 @@ AliasResult BasicAAResult::aliasGEP(
: AliasResult::MayAlias;
}
+ DominatorTree *DT = getDT(AAQI);
DecomposedGEP DecompGEP1 = DecomposeGEPExpression(GEP1, DL, &AC, DT);
DecomposedGEP DecompGEP2 = DecomposeGEPExpression(V2, DL, &AC, DT);
@@ -1556,6 +1557,7 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
const Value *HintO1 = getUnderlyingObject(Hint1);
const Value *HintO2 = getUnderlyingObject(Hint2);
+ DominatorTree *DT = getDT(AAQI);
auto ValidAssumeForPtrContext = [&](const Value *Ptr) {
if (const Instruction *PtrI = dyn_cast<Instruction>(Ptr)) {
return isValidAssumeForContext(Assume, PtrI, DT,
@@ -1735,7 +1737,7 @@ bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V,
if (!Inst || Inst->getParent()->isEntryBlock())
return true;
- return isNotInCycle(Inst, DT, /*LI*/ nullptr);
+ return isNotInCycle(Inst, getDT(AAQI), /*LI*/ nullptr);
}
/// Computes the symbolic difference between two de-composed GEPs.
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index d7d689f58a0708..87c01ead634ff8 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1261,6 +1261,8 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
BasicBlock::iterator BBIt(LoadI);
bool IsLoadCSE;
BatchAAResults BatchAA(*AA);
+ // The dominator tree is updated lazily and may not be valid at this point.
+ BatchAA.disableDominatorTree();
if (Value *AvailableVal = FindAvailableLoadedValue(
LoadI, LoadBB, BBIt, DefMaxInstsToScan, &BatchAA, &IsLoadCSE)) {
// If the value of the load is locally available within the block, just use
diff --git a/llvm/test/Transforms/JumpThreading/pr79175.ll b/llvm/test/Transforms/JumpThreading/pr79175.ll
index 6815aabb26dfc6..2c7ee0770cdc73 100644
--- a/llvm/test/Transforms/JumpThreading/pr79175.ll
+++ b/llvm/test/Transforms/JumpThreading/pr79175.ll
@@ -4,7 +4,6 @@
@f = external global i32
; Make sure the value of @f is reloaded prior to the final comparison.
-; FIXME: This is a miscompile.
define i32 @test(i64 %idx, i32 %val) {
; CHECK-LABEL: define i32 @test(
; CHECK-SAME: i64 [[IDX:%.*]], i32 [[VAL:%.*]]) {
@@ -20,13 +19,12 @@ define i32 @test(i64 %idx, i32 %val) {
; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[CMP_I]]
; CHECK-NEXT: br i1 [[COND_FR]], label [[COND_END_THREAD]], label [[TMP0:%.*]]
; CHECK: cond.end.thread:
-; CHECK-NEXT: [[F_RELOAD_PR:%.*]] = load i32, ptr @f, align 4
; CHECK-NEXT: br label [[TMP0]]
; CHECK: 0:
-; CHECK-NEXT: [[F_RELOAD:%.*]] = phi i32 [ [[F]], [[COND_END]] ], [ [[F_RELOAD_PR]], [[COND_END_THREAD]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ 0, [[COND_END_THREAD]] ], [ [[VAL]], [[COND_END]] ]
; CHECK-NEXT: [[F_IDX:%.*]] = getelementptr inbounds i32, ptr @f, i64 [[IDX]]
; CHECK-NEXT: store i32 [[TMP1]], ptr [[F_IDX]], align 4
+; CHECK-NEXT: [[F_RELOAD:%.*]] = load i32, ptr @f, align 4
; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[F_RELOAD]], 1
; CHECK-NEXT: br i1 [[CMP3]], label [[RETURN2:%.*]], label [[RETURN]]
; CHECK: return:
More information about the llvm-branch-commits
mailing list