[llvm] 30bb5be - [CSSPGO] Unblock optimizations with pseudo probe instrumentation part 2.

Hongtao Yu via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 26 16:52:40 PDT 2021


Author: Hongtao Yu
Date: 2021-04-26T16:52:33-07:00
New Revision: 30bb5be38908b0006ed94124515e43774ee37915

URL: https://github.com/llvm/llvm-project/commit/30bb5be38908b0006ed94124515e43774ee37915
DIFF: https://github.com/llvm/llvm-project/commit/30bb5be38908b0006ed94124515e43774ee37915.diff

LOG: [CSSPGO] Unblock optimizations with pseudo probe instrumentation part 2.

As a follow-up to D95982, this patch continues unblocking optimizations that are blocked by pseudu probe instrumention.

The optimizations unblocked are:
		- In-block load propagation.
		- In-block dead store elimination
		- Memory copy optimization that turns stores to consecutive memories into a memset.

These optimizations are local to a block, so they shouldn't affect the profile quality.

Reviewed By: wmi

Differential Revision: https://reviews.llvm.org/D100075

Added: 
    llvm/test/Transforms/SampleProfile/pseudo-probe-memset.ll

Modified: 
    llvm/lib/Analysis/Loads.cpp
    llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
    llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
    llvm/lib/Transforms/Scalar/Sink.cpp
    llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index da4ee7d5228e1..1c55f485aa763 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -532,7 +532,7 @@ Value *llvm::findAvailablePtrLoadStore(
     // We must ignore debug info directives when counting (otherwise they
     // would affect codegen).
     Instruction *Inst = &*--ScanFrom;
-    if (isa<DbgInfoIntrinsic>(Inst))
+    if (Inst->isDebugOrPseudoInst())
       continue;
 
     // Restore ScanFrom to expected value in case next test succeeds
@@ -620,7 +620,7 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, AAResults &AA,
   SmallVector<Instruction *> MustNotAliasInsts;
   for (Instruction &Inst : make_range(++Load->getReverseIterator(),
                                       ScanBB->rend())) {
-    if (isa<DbgInfoIntrinsic>(&Inst))
+    if (Inst.isDebugOrPseudoInst())
       continue;
 
     if (MaxInstsToScan-- == 0)

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 77d91055688b5..a1cf8e40e7a48 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1396,7 +1396,7 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
     --BBI;
     // Don't count debug info directives, lest they affect codegen,
     // and we skip pointer-to-pointer bitcasts, which are NOPs.
-    if (isa<DbgInfoIntrinsic>(BBI) ||
+    if (BBI->isDebugOrPseudoInst() ||
         (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
       ScanInsts++;
       continue;

diff  --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 83d475d90b85c..29f43f1ac3010 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -399,6 +399,13 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
       }
     }
 
+    // Calls that only access inaccessible memory do not block merging
+    // accessible stores.
+    if (auto *CB = dyn_cast<CallBase>(BI)) {
+      if (CB->onlyAccessesInaccessibleMemory())
+        continue;
+    }
+
     if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
       // If the instruction is readnone, ignore it, otherwise bail out.  We
       // don't even allow readonly here because we don't want something like:

diff  --git a/llvm/lib/Transforms/Scalar/Sink.cpp b/llvm/lib/Transforms/Scalar/Sink.cpp
index 89cfbe384be44..8600aacdb0561 100644
--- a/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -202,7 +202,7 @@ static bool ProcessBlock(BasicBlock &BB, DominatorTree &DT, LoopInfo &LI,
     if (!ProcessedBegin)
       --I;
 
-    if (isa<DbgInfoIntrinsic>(Inst))
+    if (Inst->isDebugOrPseudoInst())
       continue;
 
     if (SinkInstruction(Inst, Stores, DT, LI, AA)) {

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll
index e5bb7bc541c66..bbc03971760ab 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll
@@ -1,4 +1,4 @@
-; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+; RUN: opt -passes=instcombine -available-load-scan-limit=2 -S < %s | FileCheck %s
 
 %struct.nonbonded = type { [2 x %struct.CompAtom*], [2 x %struct.CompAtomExt*], [2 x %struct.CompAtom*], [2 x %class.Vector*], [2 x %class.Vector*], [2 x i32], %class.Vector, double*, double*, %class.ComputeNonbondedWorkArrays*, %class.Pairlists*, i32, i32, double, double, i32, i32, i32, i32 }
 %struct.CompAtomExt = type { i32 }
@@ -13,11 +13,11 @@
 %class.ResizeArrayRaw.3 = type <{ %class.Vector*, i8*, i32, i32, i32, float, i32, [4 x i8] }>
 %class.Pairlists = type { i16*, i32, i32 }
 
+define dso_local void @merge(%struct.nonbonded* nocapture readonly %params) local_unnamed_addr align 2 {
 ;; Check the minPart4 and minPart assignments are merged.
+; CHECK-LABEL: @merge(
 ; CHECK-COUNT-1: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16
 ; CHECK-NOT: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16
-
-define dso_local void @_ZN20ComputeNonbondedUtil9calc_pairEP9nonbonded(%struct.nonbonded* nocapture readonly %params) local_unnamed_addr align 2 {
 entry:
   %savePairlists3 = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 11
   %0 = load i32, i32* %savePairlists3, align 8
@@ -58,7 +58,36 @@ if.else147:                                       ; preds = %if.then138
   ret void
 }
 
-declare dso_local void @_ZN9Pairlists8addIndexEv() align 2
+define i32 @load(i32* nocapture %a, i32* nocapture %b) {
+;; Check the last store is deleted.
+; CHECK-LABEL: @load(
+; CHECK-NEXT:  %1 = getelementptr inbounds i32, i32* %a, i64 1
+; CHECK-NEXT:  %2 = load i32, i32* %1, align 8
+; CHECK-NEXT:  %3 = getelementptr inbounds i32, i32* %b, i64 1
+; CHECK-NEXT:       store i32 %2, i32* %3, align 8
+; CHECK-NEXT:    call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
+; CHECK-NEXT:    ret i32 %[[#]]
+  %1 = getelementptr inbounds i32, i32* %a, i32 1
+  %2 = load i32, i32* %1, align 8
+  %3 = getelementptr inbounds i32, i32* %b, i32 1
+       store i32 %2, i32* %3, align 8
+  %4 = getelementptr inbounds i32, i32* %b, i32 1
+  call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
+  %5 = load i32, i32* %4, align 8
+  ret i32 %5
+}
+
+define void @dse(i32* %p) {
+;; Check the first store is deleted.
+; CHECK-LABEL: @dse(
+; CHECK-NEXT:    call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
+; CHECK-NEXT:    store i32 0, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    ret void
+  store i32 0, i32* %p
+  call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
+  store i32 0, i32* %p
+  ret void
+}
 
 ; Function Attrs: inaccessiblememonly nounwind willreturn
 declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-memset.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-memset.ll
new file mode 100644
index 0000000000000..0b2b530bd5e2e
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-memset.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -memcpyopt -S | FileCheck %s
+
+%struct.MV = type { i16, i16 }
+
+define void @test(i32* nocapture %c) nounwind optsize {
+; All the stores in this example should be merged into a single memset.
+; CHECK-NOT:  store i32 -1
+; CHECK: call void @llvm.memset.p0i8.i64
+  store i32 -1, i32* %c, align 4
+  %1 = getelementptr inbounds i32, i32* %c, i32 1
+  store i32 -1, i32* %1, align 4
+  %2 = getelementptr inbounds i32, i32* %c, i32 2
+  store i32 -1, i32* %2, align 4
+  call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
+  %3 = getelementptr inbounds i32, i32* %c, i32 3
+  store i32 -1, i32* %3, align 4
+  %4 = getelementptr inbounds i32, i32* %c, i32 4
+  store i32 -1, i32* %4, align 4
+  ret void
+}
+
+; Function Attrs: inaccessiblememonly nounwind willreturn
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0
+
+attributes #0 = { inaccessiblememonly nounwind willreturn }


        


More information about the llvm-commits mailing list