[llvm] [SimplifyCFG] Fix hoisting problem in SimplifyCFG (PR #78615)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 24 12:52:34 PST 2024


https://github.com/RouzbehPaktinat updated https://github.com/llvm/llvm-project/pull/78615

>From 49f31ccc30b2febd7239966bc992484200830e3a Mon Sep 17 00:00:00 2001
From: RouzbehPaktinat <rouzbeh.paktinat1 at huawei.com>
Date: Thu, 18 Jan 2024 12:56:35 -0500
Subject: [PATCH 1/2] [SimplifyCFG] Fix hoisting problem in SimplifyCFG

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     | 180 ++++++++++++------
 .../SimplifyCFG/hoist-common-code.ll          |  52 +++++
 2 files changed, 172 insertions(+), 60 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index f3994b6cc39fef..0611b581c145e9 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1526,6 +1526,17 @@ static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2,
   return true;
 }
 
+// Hash instructions based on following factors:
+// 1- Instruction Opcode
+// 2- Instruction type
+// 3- Instruction operands
+llvm::hash_code getHash(Instruction *Instr) {
+  std::vector<Value *> operands(Instr->op_begin(), Instr->op_end());
+  return llvm::hash_combine(
+      Instr->getOpcode(), Instr->getType(),
+      hash_combine_range(operands.begin(), operands.end()));
+}
+
 /// Hoist any common code in the successor blocks up into the block. This
 /// function guarantees that BB dominates all successors. If EqTermsOnly is
 /// given, only perform hoisting in case both blocks only contain a terminator.
@@ -1533,12 +1544,11 @@ static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2,
 /// added.
 bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
                                                    bool EqTermsOnly) {
-  // This does very trivial matching, with limited scanning, to find identical
-  // instructions in the two blocks. In particular, we don't want to get into
-  // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
-  // such, we currently just scan for obviously identical instructions in an
-  // identical order, possibly separated by the same number of non-identical
-  // instructions.
+  // We first sort successors based on the number of instructions each block
+  // holds. Then for each successor we make a hashmap from its instructions,
+  // except for the first successor. After that, we iterate over the
+  // instructions of the first successor. If we find identical instructions from
+  // every other successor, we hoist all of them into the predeccessor.
   unsigned int SuccSize = succ_size(BB);
   if (SuccSize < 2)
     return false;
@@ -1552,10 +1562,21 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
 
   auto *TI = BB->getTerminator();
 
+  SmallVector<BasicBlock *> SuccessorBlocks;
+  for (auto *Succ : successors(BB))
+    SuccessorBlocks.push_back(Succ);
+
+  // Sort successor blocks based on the number of instructions.
+  // This is because we always want to iterate over instructions
+  // of the smallest block.
+  llvm::stable_sort(SuccessorBlocks, [](BasicBlock *BB1, BasicBlock *BB2) {
+    return BB1->sizeWithoutDebug() < BB2->sizeWithoutDebug();
+  });
+
   // The second of pair is a SkipFlags bitmask.
   using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
   SmallVector<SuccIterPair, 8> SuccIterPairs;
-  for (auto *Succ : successors(BB)) {
+  for (auto *Succ : SuccessorBlocks) {
     BasicBlock::iterator SuccItr = Succ->begin();
     if (isa<PHINode>(*SuccItr))
       return false;
@@ -1589,80 +1610,121 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
   }
 
   bool Changed = false;
+  auto *SuccIterPairBegin = SuccIterPairs.begin();
+  SuccIterPairBegin++;
+  auto OtherSuccIterPairRange =
+      iterator_range(SuccIterPairBegin, SuccIterPairs.end());
+  auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
+  using InstrFlagPair = std::pair<Instruction *, unsigned>;
+  SmallVector<DenseMap<llvm::hash_code, InstrFlagPair>, 2> OtherSuccessorsHash;
+
+  for (auto BBItrPair : OtherSuccIterRange) {
+    // Fill the hashmap for every other successor
+    DenseMap<llvm::hash_code, InstrFlagPair> hashMap;
+    unsigned skipFlag = 0;
+    Instruction *I = nullptr;
+    do {
+      I = &*BBItrPair;
+      skipFlag |= skippedInstrFlags(I);
+      hashMap[getHash(I)] = InstrFlagPair(I, skipFlag);
+      BBItrPair++;
+    } while (!I->isTerminator());
+    OtherSuccessorsHash.push_back(hashMap);
+  }
 
+  // Keep track of instructions skipped in the first successor
+  unsigned SkipFlagsBB1 = 0;
+  bool SameLevelHoist = true;
   for (;;) {
     auto *SuccIterPairBegin = SuccIterPairs.begin();
     auto &BB1ItrPair = *SuccIterPairBegin++;
     auto OtherSuccIterPairRange =
         iterator_range(SuccIterPairBegin, SuccIterPairs.end());
-    auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
-
     Instruction *I1 = &*BB1ItrPair.first;
     auto *BB1 = I1->getParent();
-
-    // Skip debug info if it is not identical.
-    bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) {
-      Instruction *I2 = &*Iter;
-      return I1->isIdenticalToWhenDefined(I2);
-    });
-    if (!AllDbgInstsAreIdentical) {
-      while (isa<DbgInfoIntrinsic>(I1))
-        I1 = &*++BB1ItrPair.first;
-      for (auto &SuccIter : OtherSuccIterRange) {
-        Instruction *I2 = &*SuccIter;
-        while (isa<DbgInfoIntrinsic>(I2))
-          I2 = &*++SuccIter;
+    bool HasIdenticalInst = true;
+
+    // Check if there are identical instructions in all other successors
+    for (auto &map : OtherSuccessorsHash) {
+      Instruction *I2 = map[getHash(I1)].first;
+      // We might face with same hash values for different instructions.
+      // If that happens, ignore the instruction.
+      if (!I2 || !I1->isIdenticalTo(I2)) {
+        HasIdenticalInst = false;
+        break;
       }
     }
 
-    bool AllInstsAreIdentical = true;
-    bool HasTerminator = I1->isTerminator();
-    for (auto &SuccIter : OtherSuccIterRange) {
-      Instruction *I2 = &*SuccIter;
-      HasTerminator |= I2->isTerminator();
-      if (AllInstsAreIdentical && !I1->isIdenticalToWhenDefined(I2))
-        AllInstsAreIdentical = false;
+    if (!HasIdenticalInst) {
+      if (NumSkipped >= HoistCommonSkipLimit)
+        return Changed;
+      SkipFlagsBB1 |= skippedInstrFlags(I1);
+      if (SameLevelHoist) {
+        for (auto &SuccIterPair : OtherSuccIterPairRange) {
+          Instruction *I = &*SuccIterPair.first++;
+          SuccIterPair.second |= skippedInstrFlags(I);
+        }
+      }
+      NumSkipped++;
+      if (I1->isTerminator())
+        return Changed;
+      ++BB1ItrPair.first;
+      continue;
     }
 
     // If we are hoisting the terminator instruction, don't move one (making a
     // broken BB), instead clone it, and remove BI.
-    if (HasTerminator) {
+    if (I1->isTerminator()) {
       // Even if BB, which contains only one unreachable instruction, is ignored
       // at the beginning of the loop, we can hoist the terminator instruction.
       // If any instructions remain in the block, we cannot hoist terminators.
-      if (NumSkipped || !AllInstsAreIdentical)
+      if (NumSkipped)
         return Changed;
       SmallVector<Instruction *, 8> Insts;
-      for (auto &SuccIter : OtherSuccIterRange)
-        Insts.push_back(&*SuccIter);
+      for (auto &map : OtherSuccessorsHash) {
+        Instruction *I2 = map[getHash(I1)].first;
+        // BB holding I2 should only contain the branch instruction
+        auto itr = I2->getParent()->instructionsWithoutDebug();
+        if (&*itr.begin() != I2)
+          return Changed;
+        Insts.push_back(I2);
+      }
       return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, Insts) || Changed;
     }
 
-    if (AllInstsAreIdentical) {
-      unsigned SkipFlagsBB1 = BB1ItrPair.second;
-      AllInstsAreIdentical =
-          isSafeToHoistInstr(I1, SkipFlagsBB1) &&
-          all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
-            Instruction *I2 = &*Pair.first;
-            unsigned SkipFlagsBB2 = Pair.second;
-            // Even if the instructions are identical, it may not
-            // be safe to hoist them if we have skipped over
-            // instructions with side effects or their operands
-            // weren't hoisted.
-            return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
-                   shouldHoistCommonInstructions(I1, I2, TTI);
-          });
+    bool SafeToHoist = isSafeToHoistInstr(I1, SkipFlagsBB1);
+    unsigned index = 0;
+    for (auto &SuccIterPair : OtherSuccIterPairRange) {
+      Instruction *I2 = OtherSuccessorsHash[index][getHash(I1)].first;
+      // If instructions of all successors are at the same level, use the
+      // skipFlag of its BB, i.e., SameLevelHoist. Otherwise, use the skipFlag
+      // that was calculated initially for this instruction in the hashmap
+      if (SameLevelHoist && I2 == (&*(SuccIterPair.first))) {
+        SafeToHoist = SafeToHoist &&
+                      isSafeToHoistInstr(I2, SuccIterPair.second) &&
+                      shouldHoistCommonInstructions(I1, I2, TTI);
+      } else {
+        unsigned skipFlag = OtherSuccessorsHash[index][getHash(I1)].second;
+        SafeToHoist = SafeToHoist && isSafeToHoistInstr(I2, skipFlag) &&
+                      shouldHoistCommonInstructions(I1, I2, TTI);
+        SameLevelHoist = false;
+      }
+      index++;
     }
 
-    if (AllInstsAreIdentical) {
+    if (SafeToHoist) {
       BB1ItrPair.first++;
+      if (SameLevelHoist) {
+        for (auto &SuccIterPair : OtherSuccIterPairRange)
+          SuccIterPair.first++;
+      }
       if (isa<DbgInfoIntrinsic>(I1)) {
         // The debug location is an integral part of a debug info intrinsic
         // and can't be separated from it or replaced.  Instead of attempting
         // to merge locations, simply hoist both copies of the intrinsic.
         I1->moveBeforePreserving(TI);
-        for (auto &SuccIter : OtherSuccIterRange) {
-          auto *I2 = &*SuccIter++;
+        for (auto &map : OtherSuccessorsHash) {
+          Instruction *I2 = map[getHash(I1)].first;
           assert(isa<DbgInfoIntrinsic>(I2));
           I2->moveBeforePreserving(TI);
         }
@@ -1672,8 +1734,8 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
         // we remove the now redundant second instruction.
         I1->moveBeforePreserving(TI);
         BB->splice(TI->getIterator(), BB1, I1->getIterator());
-        for (auto &SuccIter : OtherSuccIterRange) {
-          Instruction *I2 = &*SuccIter++;
+        for (auto &map : OtherSuccessorsHash) {
+          Instruction *I2 = map[getHash(I1)].first;
           assert(I2 != I1);
           if (!I2->use_empty())
             I2->replaceAllUsesWith(I1);
@@ -1695,9 +1757,12 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
       // We are about to skip over a pair of non-identical instructions. Record
       // if any have characteristics that would prevent reordering instructions
       // across them.
-      for (auto &SuccIterPair : SuccIterPairs) {
-        Instruction *I = &*SuccIterPair.first++;
-        SuccIterPair.second |= skippedInstrFlags(I);
+      SkipFlagsBB1 |= skippedInstrFlags(I1);
+      if (SameLevelHoist) {
+        for (auto &SuccIterPair : OtherSuccIterPairRange) { // update flags
+          Instruction *I = &*SuccIterPair.first;
+          SuccIterPair.second |= skippedInstrFlags(I);
+        }
       }
       ++NumSkipped;
     }
@@ -1741,7 +1806,6 @@ bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
         Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
         if (BB1V == BB2V)
           continue;
-
         // In the case of an if statement, check for
         // passingValueIsAlwaysUndefined here because we would rather eliminate
         // undefined control flow then converting it to a select.
@@ -1810,20 +1874,16 @@ bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
       }
     }
   }
-
   SmallVector<DominatorTree::UpdateType, 4> Updates;
-
   // Update any PHI nodes in our new successors.
   for (BasicBlock *Succ : successors(BB1)) {
     AddPredecessorToBlock(Succ, TIParent, BB1);
     if (DTU)
       Updates.push_back({DominatorTree::Insert, TIParent, Succ});
   }
-
   if (DTU)
     for (BasicBlock *Succ : successors(TI))
       Updates.push_back({DominatorTree::Delete, TIParent, Succ});
-
   EraseTerminatorAndDCECond(TI);
   if (DTU)
     DTU->applyUpdates(Updates);
diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll b/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll
index bfe31d8345d506..285062455e4f5f 100644
--- a/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll
+++ b/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll
@@ -24,6 +24,58 @@ F:              ; preds = %0
   ret void
 }
 
+define void @test_unordered(ptr noalias %b, ptr noalias %c, ptr noalias  %Q, ptr noalias  %R, i32 %i ) {
+; CHECK-LABEL: @test_unordered(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ldR1:%.*]] = load i32, ptr [[R:%.*]], align 8
+; CHECK-NEXT:    switch i32 %i, label %bb0 [
+; CHECK-NEXT:      i32 2, label %bb1
+; CHECK-NEXT:      i32 3, label %bb2
+; CHECK-NEXT:    ]
+; CHECK:       common.ret:         
+; CHECK-NEXT:    ret void
+; CHECK:       bb0:
+; CHECK-NEXT:    [[ldQ:%.*]] = load i32, ptr [[Q:%.*]], align 8
+; CHECK-NEXT:    [[mul:%.*]] = mul i32 [[ldQ:%.*]], 2
+; CHECK-NEXT:    [[add:%.*]] = add i32 [[ldR1:%.*]], [[mul:%.*]]
+; CHECK-NEXT:    store i32 [[add:%.*]], ptr [[c:%.*]], align 8
+; CHECK-NEXT:    br label [[COMMON_RET:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    store i32 [[ldR1:%.*]], ptr [[c:%.*]], align 4
+; CHECK-NEXT:    br label [[COMMON_RET:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[ldQ2:%.*]] = load i32, ptr [[Q:%.*]], align 8
+; CHECK-NEXT:    [[sub:%.*]] = sub i32 [[ldR1:%.*]], [[ldQ2:%.*]]
+; CHECK-NEXT:    store i32 [[sub:%.*]], ptr [[c:%.*]], align 8
+; CHECK-NEXT:    br label [[COMMON_RET:%.*]]
+
+entry:
+  switch i32 %i, label %bb0 [
+    i32 2, label %bb1
+    i32 3, label %bb2
+  ]
+                                     
+bb0:                                          ; preds = %entry
+  %ldQ1 = load i32, ptr %Q, align 8
+  %mul = mul i32 %ldQ1, 2
+  %ldR1 = load i32, ptr %R, align 8
+  %add = add i32 %ldR1, %mul
+  store i32 %add, ptr %c, align 8
+  ret void
+
+bb1:                                          ; preds = entry
+  %ldR2 = load i32, ptr %R, align 8
+  store i32 %ldR2, ptr %c
+  ret void
+
+bb2:                                          ; preds = entry
+  %ldQ2 = load i32, ptr %Q, align 8
+  %ldR3 = load i32, ptr %R, align 8
+  %sub = sub i32 %ldR3, %ldQ2
+  store i32 %sub, ptr %c, align 8
+  ret void
+}
+
 define void @test_switch(i64 %i, ptr %Q) {
 ; CHECK-LABEL: @test_switch(
 ; CHECK-NEXT:  common.ret:

>From bc382998907f28f32ec57c8553cb61e9328e258a Mon Sep 17 00:00:00 2001
From: RouzbehPaktinat <rouzbeh.paktinat1 at huawei.com>
Date: Wed, 24 Jan 2024 15:51:56 -0500
Subject: [PATCH 2/2] [SimplifyCFG] Fix bugs and address reviews

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     | 47 +++++-----
 .../PhaseOrdering/simplifyCFG-hoist.ll        | 85 +++++++++++++++++++
 2 files changed, 113 insertions(+), 19 deletions(-)
 create mode 100644 llvm/test/Transforms/PhaseOrdering/simplifyCFG-hoist.ll

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 0611b581c145e9..12c39110569b7e 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1562,27 +1562,39 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
 
   auto *TI = BB->getTerminator();
 
-  SmallVector<BasicBlock *> SuccessorBlocks;
-  for (auto *Succ : successors(BB))
-    SuccessorBlocks.push_back(Succ);
+  SmallVector<BasicBlock *, 8> SuccessorBBs;
+  for (auto *Succ : successors(BB)) {
+    BasicBlock::iterator SuccItr = Succ->begin();
+    // If we find an unreachable instruction at the beginning of a basic block,
+    // we can still hoist instructions from the rest of the basic blocks.
+    if (isa<UnreachableInst>(*SuccItr))
+      continue;
+    SuccessorBBs.push_back(Succ);
+  }
 
-  // Sort successor blocks based on the number of instructions.
-  // This is because we always want to iterate over instructions
-  // of the smallest block.
-  llvm::stable_sort(SuccessorBlocks, [](BasicBlock *BB1, BasicBlock *BB2) {
-    return BB1->sizeWithoutDebug() < BB2->sizeWithoutDebug();
-  });
+  // Find the smallest BB because we always want to iterate over instructions
+  // of the smallest Successor.
+  auto *SmallestBB = *std::min_element(SuccessorBBs.begin(), SuccessorBBs.end(),
+                                       [](BasicBlock *BB1, BasicBlock *BB2) {
+                                         return BB1->size() < BB2->size();
+                                       });
+  std::iter_swap(
+      SuccessorBBs.begin(),
+      std::find(SuccessorBBs.begin(), SuccessorBBs.end(), SmallestBB));
 
   // The second of pair is a SkipFlags bitmask.
   using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
   SmallVector<SuccIterPair, 8> SuccIterPairs;
-  for (auto *Succ : SuccessorBlocks) {
+  for (auto *Succ : SuccessorBBs) {
     BasicBlock::iterator SuccItr = Succ->begin();
     if (isa<PHINode>(*SuccItr))
       return false;
     SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
   }
 
+  if (SuccIterPairs.size() < 2)
+    return false;
+
   // Check if only hoisting terminators is allowed. This does not add new
   // instructions to the hoist location.
   if (EqTermsOnly) {
@@ -1600,14 +1612,6 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
   // many instructions we skip, serving as a compilation time control as well as
   // preventing excessive increase of life ranges.
   unsigned NumSkipped = 0;
-  // If we find an unreachable instruction at the beginning of a basic block, we
-  // can still hoist instructions from the rest of the basic blocks.
-  if (SuccIterPairs.size() > 2) {
-    erase_if(SuccIterPairs,
-             [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
-    if (SuccIterPairs.size() < 2)
-      return false;
-  }
 
   bool Changed = false;
   auto *SuccIterPairBegin = SuccIterPairs.begin();
@@ -1649,7 +1653,7 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
       Instruction *I2 = map[getHash(I1)].first;
       // We might face with same hash values for different instructions.
       // If that happens, ignore the instruction.
-      if (!I2 || !I1->isIdenticalTo(I2)) {
+      if (!I2 || !I1->isIdenticalToWhenDefined(I2)) {
         HasIdenticalInst = false;
         break;
       }
@@ -1744,6 +1748,7 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
           // I1 and I2 are being combined into a single instruction.  Its debug
           // location is the merged locations of the original instructions.
           I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
+          map.erase(getHash(I1));
           I2->eraseFromParent();
         }
       }
@@ -1874,16 +1879,20 @@ bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
       }
     }
   }
+
   SmallVector<DominatorTree::UpdateType, 4> Updates;
+
   // Update any PHI nodes in our new successors.
   for (BasicBlock *Succ : successors(BB1)) {
     AddPredecessorToBlock(Succ, TIParent, BB1);
     if (DTU)
       Updates.push_back({DominatorTree::Insert, TIParent, Succ});
   }
+
   if (DTU)
     for (BasicBlock *Succ : successors(TI))
       Updates.push_back({DominatorTree::Delete, TIParent, Succ});
+
   EraseTerminatorAndDCECond(TI);
   if (DTU)
     DTU->applyUpdates(Updates);
diff --git a/llvm/test/Transforms/PhaseOrdering/simplifyCFG-hoist.ll b/llvm/test/Transforms/PhaseOrdering/simplifyCFG-hoist.ll
new file mode 100644
index 00000000000000..1f0d25ae3f6db4
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/simplifyCFG-hoist.ll
@@ -0,0 +1,85 @@
+; opt -passes='default<O3>' -S --mtriple=aarch64-linux-gnu --mcpu=a64fx  < %s  | FileCheck %s
+
+; Hoist identical instructions from successor blocks even if
+; they are not located at the same level. This could help generate
+; more compact vectorized code.
+
+define void @hoist_then_vectorize(ptr %a, ptr %b, ptr %c, ptr %d, i32 %N){
+; CHECK-LABEL: @hoist_then_vectorize(
+; CHECK-NEXT:  iter.check:
+; CHECK-NEXT:    [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[SHIFT:%.*]] = shl i64 [[VSCALE:%.*]], 1
+; CHECK-NEXT:    [[MIN_ITR:%.*]] = icmp ugt i64 [[SHIFT:%.*]], 20
+; CHECK-NEXT:    br i1 [[MIN_ITR:%.*]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_MAIN_LOOP_ITR_CHECK:%.*]] 
+; CHECK:       vector.main.loop.iter.check:
+; CHECK-NEXT:    [[VSCALE2:%.*]] = tail call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[SHIFT2:%.*]] = shl i64 [[VSCALE2:%.*]], 2
+; CHECK-NEXT:    [[MIN_ITR2:%.*]] = icmp ugt i64 [[SHIFT2:%.*]], 20
+; CHECK-NEXT:    br i1 [[MIN_ITR2:%.*]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 
+; CHECK:       vector.ph: 
+; CHECK-NEXT:    [[VSCALE3:%.*]] = tail call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[SHIFT3:%.*]] = shl i64 [[VSCALE3:%.*]], 2
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 20, [[SHIFT3:%.*]]
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub nuw nsw i64 20, [[N_MOD_VF:%.*]]
+; CHECK-NEXT:    [[VSCALE4:%.*]] = tail call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[SHIFT4:%.*]] = shl i64 [[VSCALE4:%.*]], 2
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]] 
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]]  ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY:%.*]] ]
+; CHECK-NEXT:    [[GEP_D:%.*]]  = getelementptr inbounds i32, ptr [[D:%.*]], i64 [[INDEX:%.*]]
+; CHECK-NEXT:    [[LOAD_D:%.*]] = load <vscale x 4 x i32>, ptr [[GEP_D:%.*]], align 4
+; CHECK-NEXT:    [[MASK1:%.*]] = icmp slt <vscale x 4 x i32> [[LOAD_D:%.*]], zeroinitializer
+; CHECK-NEXT:    [[GEP_A:%.*]]  = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX:%.*]]
+; CHECK-NEXT:    [[LOAD_A:%.*]] = load <vscale x 4 x i32>, ptr [[GEP_A:%.*]], align 4
+; CHECK-NEXT:    [[MASK2:%.*]] = icmp eq <vscale x 4 x i32> [[LOAD_A:%.*]], zeroinitializer
+; CHECK-NEXT:    [[SEL1:%.*]] = select <vscale x 4 x i1> [[MASK2:%.*]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[SEL2:%.*]] = select <vscale x 4 x i1> [[MASK1:%.*]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[SEL1:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 4 x i32> [[LOAD_A:%.*]], [[SEL2:%.*]]
+; CHECK-NEXT:    store <vscale x 4 x i32> [[ADD:%.*]], ptr [[GEP_A:%.*]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT:%.*]] = add nuw i64 [[INDEX:%.*]], [[SHIFT4:%.*]]
+; CHECK-NEXT:    [[LOOP_COND:%.*]] = icmp eq i64 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]]
+; CHECK-NEXT:    br i1 [[LOOP_COND:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY:%.*]] 
+
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.inc
+  ret void
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+  %arrayidx = getelementptr inbounds i32, ptr %d, i64 %indvars.iv
+  %ldr_d = load i32, ptr %arrayidx, align 4 
+  %cmp1 = icmp slt i32 %ldr_d, 0
+  br i1 %cmp1, label %if.then, label %if.else
+
+if.then:                                          ; preds = %for.body
+  %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %ldr_a = load i32, ptr %arrayidx3, align 4
+  %add33 = add i32 %ldr_a, 1
+  store i32 %add33, ptr %arrayidx3, align 4
+  br label %for.inc
+
+if.else:                                          ; preds = %for.body
+  %cmp7 = icmp eq i32 %ldr_d, 0
+  br i1 %cmp7, label %if.then9, label %if.else15
+
+if.then9:                                         ; preds = %if.else
+  %arrayidx11 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %ldr_a2 = load i32, ptr %arrayidx11, align 4
+  %add1334 = add i32 %ldr_a2, 2
+  store i32 %add1334, ptr %arrayidx11, align 4
+  br label %for.inc
+
+if.else15:                                        ; preds = %if.else
+  %arrayidx112 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %ldr_a3 = load i32, ptr %arrayidx112, align 4
+  %add1935 = add i32 %ldr_a3, 3
+  store i32 %add1935, ptr %arrayidx112, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %if.else15, %if.then9
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 20
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
\ No newline at end of file



More information about the llvm-commits mailing list