[llvm] c75da23 - [CSSPGO] Deduplicating dangling pseudo probes.

Hongtao Yu via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 3 22:45:07 PST 2021


Author: Hongtao Yu
Date: 2021-03-03T22:44:42-08:00
New Revision: c75da238b419516534f372f87c9fd707650ebf3f

URL: https://github.com/llvm/llvm-project/commit/c75da238b419516534f372f87c9fd707650ebf3f
DIFF: https://github.com/llvm/llvm-project/commit/c75da238b419516534f372f87c9fd707650ebf3f.diff

LOG: [CSSPGO] Deduplicating dangling pseudo probes.

Same dangling probes are redundant since they all have the same semantic that is to rely on the counts inference tool to get reasonable count for the same original block. Therefore, there's no need to keep multiple copies of them. I've seen jump threading created tons of redundant dangling probes that slowed down the compiler dramatically. Other optimization passes can also result in redundant probes though without an observed impact so far.

This change removes block-wise redundant dangling probes specifically introduced by jump threading. To support removing redundant dangling probes caused by all other passes, a final function-wise deduplication is also added.

An 18% size win of the .pseudo_probe section was seen for SPEC2017. No performance difference was observed.

Differential Revision: https://reviews.llvm.org/D97482

Added: 
    llvm/test/Transforms/SampleProfile/pseudo-probe-dedup.ll

Modified: 
    llvm/include/llvm/IR/PseudoProbe.h
    llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
    llvm/lib/CodeGen/PseudoProbeInserter.cpp
    llvm/lib/IR/PseudoProbe.cpp
    llvm/lib/Transforms/Scalar/JumpThreading.cpp
    llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
    llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h
index 8a9731ff30d3e..58d83a6e2b54e 100644
--- a/llvm/include/llvm/IR/PseudoProbe.h
+++ b/llvm/include/llvm/IR/PseudoProbe.h
@@ -92,6 +92,8 @@ Optional<PseudoProbe> extractProbe(const Instruction &Inst);
 void setProbeDistributionFactor(Instruction &Inst, float Factor);
 
 bool moveAndDanglePseudoProbes(BasicBlock *From, Instruction *To);
+
+bool removeRedundantPseudoProbes(BasicBlock *Block);
 } // end namespace llvm
 
 #endif // LLVM_IR_PSEUDOPROBE_H

diff  --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 1dda739138268..fc44e2e26fc5b 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -109,8 +109,9 @@ bool MergeBlockSuccessorsIntoGivenBlocks(
     DomTreeUpdater *DTU = nullptr, LoopInfo *LI = nullptr);
 
 /// Try to remove redundant dbg.value instructions from given basic block.
-/// Returns true if at least one instruction was removed.
-bool RemoveRedundantDbgInstrs(BasicBlock *BB);
+/// Returns true if at least one instruction was removed. Remove redundant
+/// pseudo ops when RemovePseudoOp is true.
+bool RemoveRedundantDbgInstrs(BasicBlock *BB, bool RemovePseudoOp = false);
 
 /// Replace all uses of an instruction (specified by BI) with a value, then
 /// remove and delete the original instruction.

diff  --git a/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/llvm/lib/CodeGen/PseudoProbeInserter.cpp
index 35cf05316bb87..cda93931a5012 100644
--- a/llvm/lib/CodeGen/PseudoProbeInserter.cpp
+++ b/llvm/lib/CodeGen/PseudoProbeInserter.cpp
@@ -22,7 +22,7 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/MC/MCPseudoProbe.h"
 #include "llvm/Target/TargetMachine.h"
-#include <unordered_map>
+#include <unordered_set>
 
 #define DEBUG_TYPE "pseudo-probe-inserter"
 
@@ -112,6 +112,41 @@ class PseudoProbeInserter : public MachineFunctionPass {
       }
     }
 
+    // Remove redundant dangling probes. Same dangling probes are redundant
+    // since they all have the same semantic that is to rely on the counts
+    // inference too to get reasonable count for the same original block.
+    // Therefore, there's no need to keep multiple copies of them.
+    auto Hash = [](const MachineInstr *MI) {
+      return std::hash<uint64_t>()(MI->getOperand(0).getImm()) ^
+             std::hash<uint64_t>()(MI->getOperand(1).getImm());
+    };
+
+    auto IsEqual = [](const MachineInstr *Left, const MachineInstr *Right) {
+      return Left->getOperand(0).getImm() == Right->getOperand(0).getImm() &&
+             Left->getOperand(1).getImm() == Right->getOperand(1).getImm() &&
+             Left->getOperand(3).getImm() == Right->getOperand(3).getImm() &&
+             Left->getDebugLoc() == Right->getDebugLoc();
+    };
+
+    SmallVector<MachineInstr *, 4> ToBeRemoved;
+    std::unordered_set<MachineInstr *, decltype(Hash), decltype(IsEqual)>
+        DanglingProbes(0, Hash, IsEqual);
+
+    for (MachineBasicBlock &MBB : MF) {
+      for (MachineInstr &MI : MBB) {
+        if (MI.isPseudoProbe()) {
+          if ((uint32_t)MI.getPseudoProbeAttribute() &
+              (uint32_t)PseudoProbeAttributes::Dangling)
+            if (!DanglingProbes.insert(&MI).second)
+              ToBeRemoved.push_back(&MI);
+        }
+      }
+    }
+
+    for (auto *MI : ToBeRemoved)
+      MI->eraseFromParent();
+
+    Changed |= !ToBeRemoved.empty();
     return Changed;
   }
 

diff  --git a/llvm/lib/IR/PseudoProbe.cpp b/llvm/lib/IR/PseudoProbe.cpp
index 66ff00bba8dfd..5d1fef55f8c46 100644
--- a/llvm/lib/IR/PseudoProbe.cpp
+++ b/llvm/lib/IR/PseudoProbe.cpp
@@ -15,6 +15,7 @@
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
+#include <unordered_set>
 
 using namespace llvm;
 
@@ -130,4 +131,40 @@ bool moveAndDanglePseudoProbes(BasicBlock *From, Instruction *To) {
 
   return !ToBeMoved.empty();
 }
+
+/// Same dangling probes in one blocks are redundant since they all have the
+/// same semantic that is to rely on the counts inference too to get reasonable
+/// count for the same original block. Therefore, there's no need to keep
+/// multiple copies of them.
+bool removeRedundantPseudoProbes(BasicBlock *Block) {
+
+  auto Hash = [](const PseudoProbeInst *I) {
+    return std::hash<uint64_t>()(I->getFuncGuid()->getZExtValue()) ^
+           std::hash<uint64_t>()(I->getIndex()->getZExtValue());
+  };
+
+  auto IsEqual = [](const PseudoProbeInst *Left, const PseudoProbeInst *Right) {
+    return Left->getFuncGuid() == Right->getFuncGuid() &&
+           Left->getIndex() == Right->getIndex() &&
+           Left->getAttributes() == Right->getAttributes() &&
+           Left->getDebugLoc() == Right->getDebugLoc();
+  };
+
+  SmallVector<PseudoProbeInst *, 4> ToBeRemoved;
+  std::unordered_set<PseudoProbeInst *, decltype(Hash), decltype(IsEqual)>
+      DanglingProbes(0, Hash, IsEqual);
+
+  for (auto &I : *Block) {
+    if (auto *II = dyn_cast<PseudoProbeInst>(&I)) {
+      if (II->getAttributes()->getZExtValue() &
+          (uint32_t)PseudoProbeAttributes::Dangling)
+        if (!DanglingProbes.insert(II).second)
+          ToBeRemoved.push_back(II);
+    }
+  }
+
+  for (auto *I : ToBeRemoved)
+    I->eraseFromParent();
+  return !ToBeRemoved.empty();
+}
 } // namespace llvm

diff  --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 08e9d33b0e38e..2fb4a67ca1c98 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -433,8 +433,9 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
 
       // Jump threading may have introduced redundant debug values into BB
       // which should be removed.
+      // Remove redundant pseudo probes as well.
       if (Changed)
-        RemoveRedundantDbgInstrs(&BB);
+        RemoveRedundantDbgInstrs(&BB, true);
 
       // Stop processing BB if it's the entry or is now deleted. The following
       // routines attempt to eliminate BB and locating a suitable replacement
@@ -467,7 +468,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
             // detect and transform nested loops later.
             !LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
             TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU)) {
-          RemoveRedundantDbgInstrs(Succ);
+          RemoveRedundantDbgInstrs(Succ, true);
           // BB is valid for cleanup here because we passed in DTU. F remains
           // BB's parent until a DTU->getDomTree() event.
           LVI->eraseBlock(&BB);

diff  --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 31948dd17f7b8..9697e7d72c2c8 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -33,6 +33,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PseudoProbe.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
@@ -432,7 +433,7 @@ static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) {
   return !ToBeRemoved.empty();
 }
 
-bool llvm::RemoveRedundantDbgInstrs(BasicBlock *BB) {
+bool llvm::RemoveRedundantDbgInstrs(BasicBlock *BB, bool RemovePseudoOp) {
   bool MadeChanges = false;
   // By using the "backward scan" strategy before the "forward scan" strategy we
   // can remove both dbg.value (2) and (3) in a situation like this:
@@ -447,6 +448,8 @@ bool llvm::RemoveRedundantDbgInstrs(BasicBlock *BB) {
   // already is described as having the value V1 at (1).
   MadeChanges |= removeRedundantDbgInstrsUsingBackwardScan(BB);
   MadeChanges |= removeRedundantDbgInstrsUsingForwardScan(BB);
+  if (RemovePseudoOp)
+    MadeChanges |= removeRedundantPseudoProbes(BB);
 
   if (MadeChanges)
     LLVM_DEBUG(dbgs() << "Removed redundant dbg instrs from: "

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll
index 910e297b8bd3a..3844b00a5d8d0 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll
@@ -39,6 +39,34 @@ Merge:
 ; MIR-tail: PSEUDO_PROBE [[#GUID:]], 3, 0, 2
 ; MIR-tail: PSEUDO_PROBE [[#GUID:]], 4, 0, 0
 
+
+define void @foo2() {
+bb:
+  %tmp = call i32 @f1()
+  %tmp1 = icmp eq i32 %tmp, 1
+  br i1 %tmp1, label %bb5, label %bb8
+
+bb2:
+  %tmp4 = icmp ne i32 %tmp, 1
+  switch i1 %tmp4, label %bb2 [
+  i1 0, label %bb5
+  i1 1, label %bb8
+  ]
+
+bb5:
+;; Check the pseudo probe with id 3 only has one copy.
+; JT-COUNT-1: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 3, i32 2, i64 -1)
+; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 3, i32 2, i64 -1)
+  %tmp6 = phi i1 [ %tmp1, %bb ], [ false, %bb2 ]
+  br i1 %tmp6, label %bb8, label %bb7
+
+bb7:
+  br label %bb8
+
+bb8:
+  ret void
+}
+
 define i32 @test(i32 %a, i32 %b, i32 %c) {
 ;; Check block bb1 and bb2 are gone, and their probes (probe 2 and 3) are dangling.
 ; SC-LABEL: @test(

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-dedup.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-dedup.ll
new file mode 100644
index 0000000000000..57a24de8abea7
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-dedup.ll
@@ -0,0 +1,41 @@
+; REQUIRES: x86_64-linux
+; RUN: llc -pseudo-probe-for-profiling %s -filetype=asm -o - | FileCheck %s
+
+declare i32 @f1()
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0
+
+define void @foo2() {
+bb:
+; CHECK: .pseudoprobe	2494702099028631698 1 0 0
+  call void @llvm.pseudoprobe(i64 2494702099028631698, i64 1, i32 0, i64 -1)
+  %tmp = call i32 @f1()
+  %tmp1 = icmp eq i32 %tmp, 1
+  br i1 %tmp1, label %bb5, label %bb8
+
+bb2:
+;; Check the pseudo probe with id 2 only has one copy.
+; CHECK-COUNT-1: .pseudoprobe	2494702099028631698 2 0 2
+; CHECK-NOT: .pseudoprobe	2494702099028631698 2 0 2
+  call void @llvm.pseudoprobe(i64 2494702099028631698, i64 2, i32 2, i64 -1)
+  %tmp4 = icmp ne i32 %tmp, 1
+  call void @llvm.pseudoprobe(i64 2494702099028631698, i64 2, i32 2, i64 -1)
+  switch i1 %tmp4, label %bb2 [
+  i1 0, label %bb5
+  i1 1, label %bb8
+  ]
+
+bb5:
+  %tmp6 = phi i1 [ %tmp1, %bb ], [ false, %bb2 ]
+  call void @llvm.pseudoprobe(i64 2494702099028631698, i64 2, i32 2, i64 -1)
+  br i1 %tmp6, label %bb8, label %bb7
+
+bb7:
+  br label %bb8
+
+bb8:
+  ret void
+}
+
+!llvm.pseudo_probe_desc = !{!0}
+
+!0 = !{i64 2494702099028631698, i64 281612674956943, !"foo2", null}
\ No newline at end of file


        


More information about the llvm-commits mailing list