[llvm] 2f16173 - [llvm-reduce] optimize extractFromModule functions

Arthur Eubanks via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 29 10:08:03 PDT 2021


Author: Dwight Guth
Date: 2021-10-29T10:06:26-07:00
New Revision: 2f1617362751f2bff41014bc7364b24ec0ff3f47

URL: https://github.com/llvm/llvm-project/commit/2f1617362751f2bff41014bc7364b24ec0ff3f47
DIFF: https://github.com/llvm/llvm-project/commit/2f1617362751f2bff41014bc7364b24ec0ff3f47.diff

LOG: [llvm-reduce] optimize extractFromModule functions

The extractBasicBlocksFromModule, extractInstrFromModule, and other
similar functions previously performed very poorly when the number of
such elements in the program to reduce was very high. Previously, we
were creating the set which caches elements to keep by looping through
all elements in the module and adding them to the set. However, since
std::set is an ordered set, this introduces a massive amount of
rebalancing if the order of elements in the program and the order of
their pointers in memory are not the same.

The solution is straightforward: first put all the elements to be kept
in a vector, then use the constructor for std::set which takes a pair of
iterators over a collection. This constructor is optimized to avoid
doing unnecessary work when initializing large sets.

Also in this change, we pass BBsToKeep set to functions
replaceBranchTerminator and removeUninterestingBBsFromSwitch as a const
reference rather than passing it by value. This ought to prevent the
need to copy the collection each time these functions are called, which
is expensive if the collection is large.

Reviewed By: aeubanks

Differential Revision: https://reviews.llvm.org/D112757

Added: 
    

Modified: 
    llvm/tools/llvm-reduce/deltas/ReduceArguments.cpp
    llvm/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp
    llvm/tools/llvm-reduce/deltas/ReduceGlobalVars.cpp
    llvm/tools/llvm-reduce/deltas/ReduceInstructions.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/tools/llvm-reduce/deltas/ReduceArguments.cpp b/llvm/tools/llvm-reduce/deltas/ReduceArguments.cpp
index d3fcf53f46f99..eb582090bda58 100644
--- a/llvm/tools/llvm-reduce/deltas/ReduceArguments.cpp
+++ b/llvm/tools/llvm-reduce/deltas/ReduceArguments.cpp
@@ -54,7 +54,7 @@ static bool shouldRemoveArguments(const Function &F) {
 /// Removes out-of-chunk arguments from functions, and modifies their calls
 /// accordingly. It also removes allocations of out-of-chunk arguments.
 static void extractArgumentsFromModule(Oracle &O, Module &Program) {
-  std::set<Argument *> ArgsToKeep;
+  std::vector<Argument *> InitArgsToKeep;
   std::vector<Function *> Funcs;
   // Get inside-chunk arguments, as well as their parent function
   for (auto &F : Program)
@@ -62,9 +62,14 @@ static void extractArgumentsFromModule(Oracle &O, Module &Program) {
       Funcs.push_back(&F);
       for (auto &A : F.args())
         if (O.shouldKeep())
-          ArgsToKeep.insert(&A);
+          InitArgsToKeep.push_back(&A);
     }
 
+  // We create a vector first, then convert it to a set, so that we don't have
+  // to pay the cost of rebalancing the set frequently if the order we insert
+  // the elements doesn't match the order they should appear inside the set.
+  std::set<Argument *> ArgsToKeep(InitArgsToKeep.begin(), InitArgsToKeep.end());
+
   for (auto *F : Funcs) {
     ValueToValueMapTy VMap;
     std::vector<WeakVH> InstToDelete;

diff  --git a/llvm/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp b/llvm/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp
index 825993de53c76..f8a95bea62be0 100644
--- a/llvm/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp
+++ b/llvm/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp
@@ -25,7 +25,7 @@ using namespace llvm;
 
 /// Replaces BB Terminator with one that only contains Chunk BBs
 static void replaceBranchTerminator(BasicBlock &BB,
-                                    std::set<BasicBlock *> BBsToKeep) {
+                                    const std::set<BasicBlock *> &BBsToKeep) {
   auto *Term = BB.getTerminator();
   std::vector<BasicBlock *> ChunkSucessors;
   for (auto *Succ : successors(&BB))
@@ -66,8 +66,9 @@ static void replaceBranchTerminator(BasicBlock &BB,
 /// Removes uninteresting BBs from switch, if the default case ends up being
 /// uninteresting, the switch is replaced with a void return (since it has to be
 /// replace with something)
-static void removeUninterestingBBsFromSwitch(SwitchInst &SwInst,
-                                             std::set<BasicBlock *> BBsToKeep) {
+static void
+removeUninterestingBBsFromSwitch(SwitchInst &SwInst,
+                                 const std::set<BasicBlock *> &BBsToKeep) {
   if (!BBsToKeep.count(SwInst.getDefaultDest())) {
     auto *FnRetTy = SwInst.getParent()->getParent()->getReturnType();
     ReturnInst::Create(SwInst.getContext(),
@@ -88,12 +89,17 @@ static void removeUninterestingBBsFromSwitch(SwitchInst &SwInst,
 /// Removes out-of-chunk arguments from functions, and modifies their calls
 /// accordingly. It also removes allocations of out-of-chunk arguments.
 static void extractBasicBlocksFromModule(Oracle &O, Module &Program) {
-  std::set<BasicBlock *> BBsToKeep;
+  std::vector<BasicBlock *> InitBBsToKeep;
 
   for (auto &F : Program)
     for (auto &BB : F)
       if (O.shouldKeep())
-        BBsToKeep.insert(&BB);
+        InitBBsToKeep.push_back(&BB);
+
+  // We create a vector first, then convert it to a set, so that we don't have
+  // to pay the cost of rebalancing the set frequently if the order we insert
+  // the elements doesn't match the order they should appear inside the set.
+  std::set<BasicBlock *> BBsToKeep(InitBBsToKeep.begin(), InitBBsToKeep.end());
 
   std::vector<BasicBlock *> BBsToDelete;
   for (auto &F : Program)

diff  --git a/llvm/tools/llvm-reduce/deltas/ReduceGlobalVars.cpp b/llvm/tools/llvm-reduce/deltas/ReduceGlobalVars.cpp
index af685d003f942..0cb481616841b 100644
--- a/llvm/tools/llvm-reduce/deltas/ReduceGlobalVars.cpp
+++ b/llvm/tools/llvm-reduce/deltas/ReduceGlobalVars.cpp
@@ -20,10 +20,16 @@ using namespace llvm;
 /// Removes all the GVs that aren't inside the desired Chunks.
 static void extractGVsFromModule(Oracle &O, Module &Program) {
   // Get GVs inside desired chunks
-  std::set<GlobalVariable *> GVsToKeep;
+  std::vector<GlobalVariable *> InitGVsToKeep;
   for (auto &GV : Program.globals())
     if (O.shouldKeep())
-      GVsToKeep.insert(&GV);
+      InitGVsToKeep.push_back(&GV);
+
+  // We create a vector first, then convert it to a set, so that we don't have
+  // to pay the cost of rebalancing the set frequently if the order we insert
+  // the elements doesn't match the order they should appear inside the set.
+  std::set<GlobalVariable *> GVsToKeep(InitGVsToKeep.begin(),
+                                       InitGVsToKeep.end());
 
   // Delete out-of-chunk GVs and their uses
   std::vector<GlobalVariable *> ToRemove;

diff  --git a/llvm/tools/llvm-reduce/deltas/ReduceInstructions.cpp b/llvm/tools/llvm-reduce/deltas/ReduceInstructions.cpp
index 01df5b6ba4d6d..df7b54ccc0a9d 100644
--- a/llvm/tools/llvm-reduce/deltas/ReduceInstructions.cpp
+++ b/llvm/tools/llvm-reduce/deltas/ReduceInstructions.cpp
@@ -18,18 +18,24 @@ using namespace llvm;
 /// Removes out-of-chunk arguments from functions, and modifies their calls
 /// accordingly. It also removes allocations of out-of-chunk arguments.
 static void extractInstrFromModule(Oracle &O, Module &Program) {
-  std::set<Instruction *> InstToKeep;
+  std::vector<Instruction *> InitInstToKeep;
 
   for (auto &F : Program)
     for (auto &BB : F) {
       // Removing the terminator would make the block invalid. Only iterate over
       // instructions before the terminator.
-      InstToKeep.insert(BB.getTerminator());
+      InitInstToKeep.push_back(BB.getTerminator());
       for (auto &Inst : make_range(BB.begin(), std::prev(BB.end())))
         if (O.shouldKeep())
-          InstToKeep.insert(&Inst);
+          InitInstToKeep.push_back(&Inst);
     }
 
+  // We create a vector first, then convert it to a set, so that we don't have
+  // to pay the cost of rebalancing the set frequently if the order we insert
+  // the elements doesn't match the order they should appear inside the set.
+  std::set<Instruction *> InstToKeep(InitInstToKeep.begin(),
+                                     InitInstToKeep.end());
+
   std::vector<Instruction *> InstToDelete;
   for (auto &F : Program)
     for (auto &BB : F)


        


More information about the llvm-commits mailing list