[llvm] r284631 - [LV] Avoid emitting trivially dead instructions

Matthew Simpson via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 19 12:22:02 PDT 2016


Author: mssimpso
Date: Wed Oct 19 14:22:02 2016
New Revision: 284631

URL: http://llvm.org/viewvc/llvm-project?rev=284631&view=rev
Log:
[LV] Avoid emitting trivially dead instructions

Some instructions from the original loop, when vectorized, can become trivially
dead. This happens because of the way we structure the new loop. For example,
we create new induction variables and induction variable "steps" in the new
loop. Thus, when we go to vectorize the original induction variable update, it
may no longer be needed due to the instructions we've already created. This
patch prevents us from creating these redundant instructions. This reduces code
size before simplification and allows greater flexibility in code generation
since we have fewer unnecessary instruction uses.

Differential Revision: https://reviews.llvm.org/D25631

Added:
    llvm/trunk/test/Transforms/LoopVectorize/dead_instructions.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=284631&r1=284630&r2=284631&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Wed Oct 19 14:22:02 2016
@@ -441,6 +441,10 @@ protected:
   /// respective conditions.
   void predicateInstructions();
 
+  /// Collect the instructions from the original loop that would be trivially
+  /// dead in the vectorized loop if generated.
+  void collectTriviallyDeadInstructions();
+
   /// Shrinks vector element sizes to the smallest bitwidth they can be legally
   /// represented as.
   void truncateToMinimalBitwidths();
@@ -763,6 +767,14 @@ protected:
 
   // Record whether runtime checks are added.
   bool AddedSafetyChecks;
+
+  // Holds instructions from the original loop whose counterparts in the
+  // vectorized loop would be trivially dead if generated. For example,
+  // original induction update instructions can become dead because we
+  // separately emit induction "steps" when generating code for the new loop.
+  // Similarly, we create a new latch condition when setting up the structure
+  // of the new loop, so the old one can become dead.
+  SmallPtrSet<Instruction *, 4> DeadInstructions;
 };
 
 class InnerLoopUnroller : public InnerLoopVectorizer {
@@ -3802,6 +3814,11 @@ void InnerLoopVectorizer::vectorizeLoop(
   // are vectorized, so we can use them to construct the PHI.
   PhiVector PHIsToFix;
 
+  // Collect instructions from the original loop that will become trivially
+  // dead in the vectorized loop. We don't need to vectorize these
+  // instructions.
+  collectTriviallyDeadInstructions();
+
   // Scan the loop in a topological order to ensure that defs are vectorized
   // before users.
   LoopBlocksDFS DFS(OrigLoop);
@@ -4209,6 +4226,29 @@ void InnerLoopVectorizer::fixLCSSAPHIs()
   }
 }
 
+void InnerLoopVectorizer::collectTriviallyDeadInstructions() {
+  BasicBlock *Latch = OrigLoop->getLoopLatch();
+
+  // We create new control-flow for the vectorized loop, so the original
+  // condition will be dead after vectorization if it's only used by the
+  // branch.
+  auto *Cmp = dyn_cast<Instruction>(Latch->getTerminator()->getOperand(0));
+  if (Cmp && Cmp->hasOneUse())
+    DeadInstructions.insert(Cmp);
+
+  // We create new "steps" for induction variable updates to which the original
+  // induction variables map. An original update instruction will be dead if
+  // all its users except the induction variable are dead.
+  for (auto &Induction : *Legal->getInductionVars()) {
+    PHINode *Ind = Induction.first;
+    auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
+    if (all_of(IndUpdate->users(), [&](User *U) -> bool {
+          return U == Ind || DeadInstructions.count(cast<Instruction>(U));
+        }))
+      DeadInstructions.insert(IndUpdate);
+  }
+}
+
 void InnerLoopVectorizer::predicateInstructions() {
 
   // For each instruction I marked for predication on value C, split I into its
@@ -4536,6 +4576,11 @@ void InnerLoopVectorizer::vectorizeBlock
   // For each instruction in the old loop.
   for (Instruction &I : *BB) {
 
+    // If the instruction will become trivially dead when vectorized, we don't
+    // need to generate it.
+    if (DeadInstructions.count(&I))
+      continue;
+
     // Scalarize instructions that should remain scalar after vectorization.
     if (!(isa<BranchInst>(&I) || isa<PHINode>(&I) ||
           isa<DbgInfoIntrinsic>(&I)) &&

Added: llvm/trunk/test/Transforms/LoopVectorize/dead_instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/dead_instructions.ll?rev=284631&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/dead_instructions.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/dead_instructions.ll Wed Oct 19 14:22:02 2016
@@ -0,0 +1,42 @@
+; RUN: opt < %s -force-vector-width=2 -force-vector-interleave=2 -loop-vectorize -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: @dead_instructions_01
+;
+; This test ensures that we don't generate trivially dead instructions prior to
+; instruction simplification. We don't need to generate instructions
+; corresponding to the original induction variable update or branch condition,
+; since we rewrite the loop structure.
+;
+; CHECK:     vector.body:
+; CHECK:       %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK:       %[[I0:.+]] = add i64 %index, 0
+; CHECK:       %[[I2:.+]] = add i64 %index, 2
+; CHECK:       getelementptr inbounds i64, i64* %a, i64 %[[I0]]
+; CHECK:       getelementptr inbounds i64, i64* %a, i64 %[[I2]]
+; CHECK-NOT:   add nuw nsw i64 %[[I0]], 1
+; CHECK-NOT:   add nuw nsw i64 %[[I2]], 1
+; CHECK-NOT:   icmp slt i64 {{.*}}, %n
+; CHECK:       %index.next = add i64 %index, 4
+; CHECK:       %[[CMP:.+]] = icmp eq i64 %index.next, %n.vec
+; CHECK:       br i1 %[[CMP]], label %middle.block, label %vector.body
+;
+define i64 @dead_instructions_01(i64 *%a, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+  %r = phi i64 [ %tmp2, %for.body ], [ 0, %entry ]
+  %tmp0 = getelementptr inbounds i64, i64* %a, i64 %i
+  %tmp1 = load i64, i64* %tmp0, align 8
+  %tmp2 = add i64 %tmp1, %r
+  %i.next = add nuw nsw i64 %i, 1
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  %tmp3  = phi i64 [ %tmp2, %for.body ]
+  ret i64 %tmp3
+}




More information about the llvm-commits mailing list