[llvm] [SimplifyCFG] Fix hoisting problem in SimplifyCFG (PR #78615)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 18 11:19:45 PST 2024


https://github.com/RouzbehPaktinat created https://github.com/llvm/llvm-project/pull/78615

This patch fixes the hoisting problem discussed in this issue: https://github.com/llvm/llvm-project/issues/68395

Currently, SimplifyCFG tries to hoist identical instructions from successors of a basic block, only if those instructions are located at the same level in the successor blocks which causes missing many hoisting opportunities. This patch solves the problem by creating hashmaps from instructions of all successors except the first one. It then iterates over the instructions of the first successor and looks up hashmaps to find identical instructions in order to hoist them.

Measurements on llvm-test-suit/MultiSource showed that it improves compile time by ~1%.

>From 49f31ccc30b2febd7239966bc992484200830e3a Mon Sep 17 00:00:00 2001
From: RouzbehPaktinat <rouzbeh.paktinat1 at huawei.com>
Date: Thu, 18 Jan 2024 12:56:35 -0500
Subject: [PATCH] [SimplifyCFG] Fix hoisting problem in SimplifyCFG

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     | 180 ++++++++++++------
 .../SimplifyCFG/hoist-common-code.ll          |  52 +++++
 2 files changed, 172 insertions(+), 60 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index f3994b6cc39fef..0611b581c145e9 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1526,6 +1526,17 @@ static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2,
   return true;
 }
 
+// Hash instructions based on following factors:
+// 1- Instruction Opcode
+// 2- Instruction type
+// 3- Instruction operands
+llvm::hash_code getHash(Instruction *Instr) {
+  std::vector<Value *> operands(Instr->op_begin(), Instr->op_end());
+  return llvm::hash_combine(
+      Instr->getOpcode(), Instr->getType(),
+      hash_combine_range(operands.begin(), operands.end()));
+}
+
 /// Hoist any common code in the successor blocks up into the block. This
 /// function guarantees that BB dominates all successors. If EqTermsOnly is
 /// given, only perform hoisting in case both blocks only contain a terminator.
@@ -1533,12 +1544,11 @@ static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2,
 /// added.
 bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
                                                    bool EqTermsOnly) {
-  // This does very trivial matching, with limited scanning, to find identical
-  // instructions in the two blocks. In particular, we don't want to get into
-  // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
-  // such, we currently just scan for obviously identical instructions in an
-  // identical order, possibly separated by the same number of non-identical
-  // instructions.
+  // We first sort successors based on the number of instructions each block
+  // holds. Then for each successor we make a hashmap from its instructions,
+  // except for the first successor. After that, we iterate over the
+  // instructions of the first successor. If we find identical instructions from
+  // every other successor, we hoist all of them into the predeccessor.
   unsigned int SuccSize = succ_size(BB);
   if (SuccSize < 2)
     return false;
@@ -1552,10 +1562,21 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
 
   auto *TI = BB->getTerminator();
 
+  SmallVector<BasicBlock *> SuccessorBlocks;
+  for (auto *Succ : successors(BB))
+    SuccessorBlocks.push_back(Succ);
+
+  // Sort successor blocks based on the number of instructions.
+  // This is because we always want to iterate over instructions
+  // of the smallest block.
+  llvm::stable_sort(SuccessorBlocks, [](BasicBlock *BB1, BasicBlock *BB2) {
+    return BB1->sizeWithoutDebug() < BB2->sizeWithoutDebug();
+  });
+
   // The second of pair is a SkipFlags bitmask.
   using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
   SmallVector<SuccIterPair, 8> SuccIterPairs;
-  for (auto *Succ : successors(BB)) {
+  for (auto *Succ : SuccessorBlocks) {
     BasicBlock::iterator SuccItr = Succ->begin();
     if (isa<PHINode>(*SuccItr))
       return false;
@@ -1589,80 +1610,121 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
   }
 
   bool Changed = false;
+  auto *SuccIterPairBegin = SuccIterPairs.begin();
+  SuccIterPairBegin++;
+  auto OtherSuccIterPairRange =
+      iterator_range(SuccIterPairBegin, SuccIterPairs.end());
+  auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
+  using InstrFlagPair = std::pair<Instruction *, unsigned>;
+  SmallVector<DenseMap<llvm::hash_code, InstrFlagPair>, 2> OtherSuccessorsHash;
+
+  for (auto BBItrPair : OtherSuccIterRange) {
+    // Fill the hashmap for every other successor
+    DenseMap<llvm::hash_code, InstrFlagPair> hashMap;
+    unsigned skipFlag = 0;
+    Instruction *I = nullptr;
+    do {
+      I = &*BBItrPair;
+      skipFlag |= skippedInstrFlags(I);
+      hashMap[getHash(I)] = InstrFlagPair(I, skipFlag);
+      BBItrPair++;
+    } while (!I->isTerminator());
+    OtherSuccessorsHash.push_back(hashMap);
+  }
 
+  // Keep track of instructions skipped in the first successor
+  unsigned SkipFlagsBB1 = 0;
+  bool SameLevelHoist = true;
   for (;;) {
     auto *SuccIterPairBegin = SuccIterPairs.begin();
     auto &BB1ItrPair = *SuccIterPairBegin++;
     auto OtherSuccIterPairRange =
         iterator_range(SuccIterPairBegin, SuccIterPairs.end());
-    auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
-
     Instruction *I1 = &*BB1ItrPair.first;
     auto *BB1 = I1->getParent();
-
-    // Skip debug info if it is not identical.
-    bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) {
-      Instruction *I2 = &*Iter;
-      return I1->isIdenticalToWhenDefined(I2);
-    });
-    if (!AllDbgInstsAreIdentical) {
-      while (isa<DbgInfoIntrinsic>(I1))
-        I1 = &*++BB1ItrPair.first;
-      for (auto &SuccIter : OtherSuccIterRange) {
-        Instruction *I2 = &*SuccIter;
-        while (isa<DbgInfoIntrinsic>(I2))
-          I2 = &*++SuccIter;
+    bool HasIdenticalInst = true;
+
+    // Check if there are identical instructions in all other successors
+    for (auto &map : OtherSuccessorsHash) {
+      Instruction *I2 = map[getHash(I1)].first;
+      // We might face with same hash values for different instructions.
+      // If that happens, ignore the instruction.
+      if (!I2 || !I1->isIdenticalTo(I2)) {
+        HasIdenticalInst = false;
+        break;
       }
     }
 
-    bool AllInstsAreIdentical = true;
-    bool HasTerminator = I1->isTerminator();
-    for (auto &SuccIter : OtherSuccIterRange) {
-      Instruction *I2 = &*SuccIter;
-      HasTerminator |= I2->isTerminator();
-      if (AllInstsAreIdentical && !I1->isIdenticalToWhenDefined(I2))
-        AllInstsAreIdentical = false;
+    if (!HasIdenticalInst) {
+      if (NumSkipped >= HoistCommonSkipLimit)
+        return Changed;
+      SkipFlagsBB1 |= skippedInstrFlags(I1);
+      if (SameLevelHoist) {
+        for (auto &SuccIterPair : OtherSuccIterPairRange) {
+          Instruction *I = &*SuccIterPair.first++;
+          SuccIterPair.second |= skippedInstrFlags(I);
+        }
+      }
+      NumSkipped++;
+      if (I1->isTerminator())
+        return Changed;
+      ++BB1ItrPair.first;
+      continue;
     }
 
     // If we are hoisting the terminator instruction, don't move one (making a
     // broken BB), instead clone it, and remove BI.
-    if (HasTerminator) {
+    if (I1->isTerminator()) {
       // Even if BB, which contains only one unreachable instruction, is ignored
       // at the beginning of the loop, we can hoist the terminator instruction.
       // If any instructions remain in the block, we cannot hoist terminators.
-      if (NumSkipped || !AllInstsAreIdentical)
+      if (NumSkipped)
         return Changed;
       SmallVector<Instruction *, 8> Insts;
-      for (auto &SuccIter : OtherSuccIterRange)
-        Insts.push_back(&*SuccIter);
+      for (auto &map : OtherSuccessorsHash) {
+        Instruction *I2 = map[getHash(I1)].first;
+        // BB holding I2 should only contain the branch instruction
+        auto itr = I2->getParent()->instructionsWithoutDebug();
+        if (&*itr.begin() != I2)
+          return Changed;
+        Insts.push_back(I2);
+      }
       return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, Insts) || Changed;
     }
 
-    if (AllInstsAreIdentical) {
-      unsigned SkipFlagsBB1 = BB1ItrPair.second;
-      AllInstsAreIdentical =
-          isSafeToHoistInstr(I1, SkipFlagsBB1) &&
-          all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
-            Instruction *I2 = &*Pair.first;
-            unsigned SkipFlagsBB2 = Pair.second;
-            // Even if the instructions are identical, it may not
-            // be safe to hoist them if we have skipped over
-            // instructions with side effects or their operands
-            // weren't hoisted.
-            return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
-                   shouldHoistCommonInstructions(I1, I2, TTI);
-          });
+    bool SafeToHoist = isSafeToHoistInstr(I1, SkipFlagsBB1);
+    unsigned index = 0;
+    for (auto &SuccIterPair : OtherSuccIterPairRange) {
+      Instruction *I2 = OtherSuccessorsHash[index][getHash(I1)].first;
+      // If instructions of all successors are at the same level, use the
+      // skipFlag of its BB, i.e., SameLevelHoist. Otherwise, use the skipFlag
+      // that was calculated initially for this instruction in the hashmap
+      if (SameLevelHoist && I2 == (&*(SuccIterPair.first))) {
+        SafeToHoist = SafeToHoist &&
+                      isSafeToHoistInstr(I2, SuccIterPair.second) &&
+                      shouldHoistCommonInstructions(I1, I2, TTI);
+      } else {
+        unsigned skipFlag = OtherSuccessorsHash[index][getHash(I1)].second;
+        SafeToHoist = SafeToHoist && isSafeToHoistInstr(I2, skipFlag) &&
+                      shouldHoistCommonInstructions(I1, I2, TTI);
+        SameLevelHoist = false;
+      }
+      index++;
     }
 
-    if (AllInstsAreIdentical) {
+    if (SafeToHoist) {
       BB1ItrPair.first++;
+      if (SameLevelHoist) {
+        for (auto &SuccIterPair : OtherSuccIterPairRange)
+          SuccIterPair.first++;
+      }
       if (isa<DbgInfoIntrinsic>(I1)) {
         // The debug location is an integral part of a debug info intrinsic
         // and can't be separated from it or replaced.  Instead of attempting
         // to merge locations, simply hoist both copies of the intrinsic.
         I1->moveBeforePreserving(TI);
-        for (auto &SuccIter : OtherSuccIterRange) {
-          auto *I2 = &*SuccIter++;
+        for (auto &map : OtherSuccessorsHash) {
+          Instruction *I2 = map[getHash(I1)].first;
           assert(isa<DbgInfoIntrinsic>(I2));
           I2->moveBeforePreserving(TI);
         }
@@ -1672,8 +1734,8 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
         // we remove the now redundant second instruction.
         I1->moveBeforePreserving(TI);
         BB->splice(TI->getIterator(), BB1, I1->getIterator());
-        for (auto &SuccIter : OtherSuccIterRange) {
-          Instruction *I2 = &*SuccIter++;
+        for (auto &map : OtherSuccessorsHash) {
+          Instruction *I2 = map[getHash(I1)].first;
           assert(I2 != I1);
           if (!I2->use_empty())
             I2->replaceAllUsesWith(I1);
@@ -1695,9 +1757,12 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
       // We are about to skip over a pair of non-identical instructions. Record
       // if any have characteristics that would prevent reordering instructions
       // across them.
-      for (auto &SuccIterPair : SuccIterPairs) {
-        Instruction *I = &*SuccIterPair.first++;
-        SuccIterPair.second |= skippedInstrFlags(I);
+      SkipFlagsBB1 |= skippedInstrFlags(I1);
+      if (SameLevelHoist) {
+        for (auto &SuccIterPair : OtherSuccIterPairRange) { // update flags
+          Instruction *I = &*SuccIterPair.first;
+          SuccIterPair.second |= skippedInstrFlags(I);
+        }
       }
       ++NumSkipped;
     }
@@ -1741,7 +1806,6 @@ bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
         Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
         if (BB1V == BB2V)
           continue;
-
         // In the case of an if statement, check for
         // passingValueIsAlwaysUndefined here because we would rather eliminate
         // undefined control flow then converting it to a select.
@@ -1810,20 +1874,16 @@ bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
       }
     }
   }
-
   SmallVector<DominatorTree::UpdateType, 4> Updates;
-
   // Update any PHI nodes in our new successors.
   for (BasicBlock *Succ : successors(BB1)) {
     AddPredecessorToBlock(Succ, TIParent, BB1);
     if (DTU)
       Updates.push_back({DominatorTree::Insert, TIParent, Succ});
   }
-
   if (DTU)
     for (BasicBlock *Succ : successors(TI))
       Updates.push_back({DominatorTree::Delete, TIParent, Succ});
-
   EraseTerminatorAndDCECond(TI);
   if (DTU)
     DTU->applyUpdates(Updates);
diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll b/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll
index bfe31d8345d506..285062455e4f5f 100644
--- a/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll
+++ b/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll
@@ -24,6 +24,58 @@ F:              ; preds = %0
   ret void
 }
 
+define void @test_unordered(ptr noalias %b, ptr noalias %c, ptr noalias  %Q, ptr noalias  %R, i32 %i ) {
+; CHECK-LABEL: @test_unordered(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ldR1:%.*]] = load i32, ptr [[R:%.*]], align 8
+; CHECK-NEXT:    switch i32 %i, label %bb0 [
+; CHECK-NEXT:      i32 2, label %bb1
+; CHECK-NEXT:      i32 3, label %bb2
+; CHECK-NEXT:    ]
+; CHECK:       common.ret:         
+; CHECK-NEXT:    ret void
+; CHECK:       bb0:
+; CHECK-NEXT:    [[ldQ:%.*]] = load i32, ptr [[Q:%.*]], align 8
+; CHECK-NEXT:    [[mul:%.*]] = mul i32 [[ldQ:%.*]], 2
+; CHECK-NEXT:    [[add:%.*]] = add i32 [[ldR1:%.*]], [[mul:%.*]]
+; CHECK-NEXT:    store i32 [[add:%.*]], ptr [[c:%.*]], align 8
+; CHECK-NEXT:    br label [[COMMON_RET:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    store i32 [[ldR1:%.*]], ptr [[c:%.*]], align 4
+; CHECK-NEXT:    br label [[COMMON_RET:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[ldQ2:%.*]] = load i32, ptr [[Q:%.*]], align 8
+; CHECK-NEXT:    [[sub:%.*]] = sub i32 [[ldR1:%.*]], [[ldQ2:%.*]]
+; CHECK-NEXT:    store i32 [[sub:%.*]], ptr [[c:%.*]], align 8
+; CHECK-NEXT:    br label [[COMMON_RET:%.*]]
+
+entry:
+  switch i32 %i, label %bb0 [
+    i32 2, label %bb1
+    i32 3, label %bb2
+  ]
+                                     
+bb0:                                          ; preds = %entry
+  %ldQ1 = load i32, ptr %Q, align 8
+  %mul = mul i32 %ldQ1, 2
+  %ldR1 = load i32, ptr %R, align 8
+  %add = add i32 %ldR1, %mul
+  store i32 %add, ptr %c, align 8
+  ret void
+
+bb1:                                          ; preds = entry
+  %ldR2 = load i32, ptr %R, align 8
+  store i32 %ldR2, ptr %c
+  ret void
+
+bb2:                                          ; preds = entry
+  %ldQ2 = load i32, ptr %Q, align 8
+  %ldR3 = load i32, ptr %R, align 8
+  %sub = sub i32 %ldR3, %ldQ2
+  store i32 %sub, ptr %c, align 8
+  ret void
+}
+
 define void @test_switch(i64 %i, ptr %Q) {
 ; CHECK-LABEL: @test_switch(
 ; CHECK-NEXT:  common.ret:



More information about the llvm-commits mailing list