[llvm] 30bb5be - [CSSPGO] Unblock optimizations with pseudo probe instrumentation part 2.
Hongtao Yu via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 26 16:52:40 PDT 2021
Author: Hongtao Yu
Date: 2021-04-26T16:52:33-07:00
New Revision: 30bb5be38908b0006ed94124515e43774ee37915
URL: https://github.com/llvm/llvm-project/commit/30bb5be38908b0006ed94124515e43774ee37915
DIFF: https://github.com/llvm/llvm-project/commit/30bb5be38908b0006ed94124515e43774ee37915.diff
LOG: [CSSPGO] Unblock optimizations with pseudo probe instrumentation part 2.
As a follow-up to D95982, this patch continues unblocking optimizations that are blocked by pseudu probe instrumention.
The optimizations unblocked are:
- In-block load propagation.
- In-block dead store elimination
- Memory copy optimization that turns stores to consecutive memories into a memset.
These optimizations are local to a block, so they shouldn't affect the profile quality.
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D100075
Added:
llvm/test/Transforms/SampleProfile/pseudo-probe-memset.ll
Modified:
llvm/lib/Analysis/Loads.cpp
llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
llvm/lib/Transforms/Scalar/Sink.cpp
llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index da4ee7d5228e1..1c55f485aa763 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -532,7 +532,7 @@ Value *llvm::findAvailablePtrLoadStore(
// We must ignore debug info directives when counting (otherwise they
// would affect codegen).
Instruction *Inst = &*--ScanFrom;
- if (isa<DbgInfoIntrinsic>(Inst))
+ if (Inst->isDebugOrPseudoInst())
continue;
// Restore ScanFrom to expected value in case next test succeeds
@@ -620,7 +620,7 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, AAResults &AA,
SmallVector<Instruction *> MustNotAliasInsts;
for (Instruction &Inst : make_range(++Load->getReverseIterator(),
ScanBB->rend())) {
- if (isa<DbgInfoIntrinsic>(&Inst))
+ if (Inst.isDebugOrPseudoInst())
continue;
if (MaxInstsToScan-- == 0)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 77d91055688b5..a1cf8e40e7a48 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1396,7 +1396,7 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
--BBI;
// Don't count debug info directives, lest they affect codegen,
// and we skip pointer-to-pointer bitcasts, which are NOPs.
- if (isa<DbgInfoIntrinsic>(BBI) ||
+ if (BBI->isDebugOrPseudoInst() ||
(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
ScanInsts++;
continue;
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 83d475d90b85c..29f43f1ac3010 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -399,6 +399,13 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
}
}
+ // Calls that only access inaccessible memory do not block merging
+ // accessible stores.
+ if (auto *CB = dyn_cast<CallBase>(BI)) {
+ if (CB->onlyAccessesInaccessibleMemory())
+ continue;
+ }
+
if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
// If the instruction is readnone, ignore it, otherwise bail out. We
// don't even allow readonly here because we don't want something like:
diff --git a/llvm/lib/Transforms/Scalar/Sink.cpp b/llvm/lib/Transforms/Scalar/Sink.cpp
index 89cfbe384be44..8600aacdb0561 100644
--- a/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -202,7 +202,7 @@ static bool ProcessBlock(BasicBlock &BB, DominatorTree &DT, LoopInfo &LI,
if (!ProcessedBegin)
--I;
- if (isa<DbgInfoIntrinsic>(Inst))
+ if (Inst->isDebugOrPseudoInst())
continue;
if (SinkInstruction(Inst, Stores, DT, LI, AA)) {
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll
index e5bb7bc541c66..bbc03971760ab 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll
@@ -1,4 +1,4 @@
-; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+; RUN: opt -passes=instcombine -available-load-scan-limit=2 -S < %s | FileCheck %s
%struct.nonbonded = type { [2 x %struct.CompAtom*], [2 x %struct.CompAtomExt*], [2 x %struct.CompAtom*], [2 x %class.Vector*], [2 x %class.Vector*], [2 x i32], %class.Vector, double*, double*, %class.ComputeNonbondedWorkArrays*, %class.Pairlists*, i32, i32, double, double, i32, i32, i32, i32 }
%struct.CompAtomExt = type { i32 }
@@ -13,11 +13,11 @@
%class.ResizeArrayRaw.3 = type <{ %class.Vector*, i8*, i32, i32, i32, float, i32, [4 x i8] }>
%class.Pairlists = type { i16*, i32, i32 }
+define dso_local void @merge(%struct.nonbonded* nocapture readonly %params) local_unnamed_addr align 2 {
;; Check the minPart4 and minPart assignments are merged.
+; CHECK-LABEL: @merge(
; CHECK-COUNT-1: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16
; CHECK-NOT: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16
-
-define dso_local void @_ZN20ComputeNonbondedUtil9calc_pairEP9nonbonded(%struct.nonbonded* nocapture readonly %params) local_unnamed_addr align 2 {
entry:
%savePairlists3 = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 11
%0 = load i32, i32* %savePairlists3, align 8
@@ -58,7 +58,36 @@ if.else147: ; preds = %if.then138
ret void
}
-declare dso_local void @_ZN9Pairlists8addIndexEv() align 2
+define i32 @load(i32* nocapture %a, i32* nocapture %b) {
+;; Check the last store is deleted.
+; CHECK-LABEL: @load(
+; CHECK-NEXT: %1 = getelementptr inbounds i32, i32* %a, i64 1
+; CHECK-NEXT: %2 = load i32, i32* %1, align 8
+; CHECK-NEXT: %3 = getelementptr inbounds i32, i32* %b, i64 1
+; CHECK-NEXT: store i32 %2, i32* %3, align 8
+; CHECK-NEXT: call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
+; CHECK-NEXT: ret i32 %[[#]]
+ %1 = getelementptr inbounds i32, i32* %a, i32 1
+ %2 = load i32, i32* %1, align 8
+ %3 = getelementptr inbounds i32, i32* %b, i32 1
+ store i32 %2, i32* %3, align 8
+ %4 = getelementptr inbounds i32, i32* %b, i32 1
+ call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
+ %5 = load i32, i32* %4, align 8
+ ret i32 %5
+}
+
+define void @dse(i32* %p) {
+;; Check the first store is deleted.
+; CHECK-LABEL: @dse(
+; CHECK-NEXT: call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
+; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4
+; CHECK-NEXT: ret void
+ store i32 0, i32* %p
+ call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
+ store i32 0, i32* %p
+ ret void
+}
; Function Attrs: inaccessiblememonly nounwind willreturn
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-memset.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-memset.ll
new file mode 100644
index 0000000000000..0b2b530bd5e2e
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-memset.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -memcpyopt -S | FileCheck %s
+
+%struct.MV = type { i16, i16 }
+
+define void @test(i32* nocapture %c) nounwind optsize {
+; All the stores in this example should be merged into a single memset.
+; CHECK-NOT: store i32 -1
+; CHECK: call void @llvm.memset.p0i8.i64
+ store i32 -1, i32* %c, align 4
+ %1 = getelementptr inbounds i32, i32* %c, i32 1
+ store i32 -1, i32* %1, align 4
+ %2 = getelementptr inbounds i32, i32* %c, i32 2
+ store i32 -1, i32* %2, align 4
+ call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
+ %3 = getelementptr inbounds i32, i32* %c, i32 3
+ store i32 -1, i32* %3, align 4
+ %4 = getelementptr inbounds i32, i32* %c, i32 4
+ store i32 -1, i32* %4, align 4
+ ret void
+}
+
+; Function Attrs: inaccessiblememonly nounwind willreturn
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0
+
+attributes #0 = { inaccessiblememonly nounwind willreturn }
More information about the llvm-commits
mailing list