[llvm] r185122 - LoopVectorize: Preserve debug location info

Thu Jun 27 17:38:54 PDT 2013

Author: arnolds
Date: Thu Jun 27 19:38:54 2013
New Revision: 185122

URL: http://llvm.org/viewvc/llvm-project?rev=185122&view=rev
Log:
LoopVectorize: Preserve debug location info

radar://14169017

Added:
    llvm/trunk/test/Transforms/LoopVectorize/debugloc.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=185122&r1=185121&r2=185122&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Thu Jun 27 19:38:54 2013
@@ -326,6 +326,49 @@ private:
   EdgeMaskCache MaskCache;
 };
 
+/// \brief Set/reset the debug location in the IR builder using the RAII idiom.
+class DebugLocSetter {
+  IRBuilder<> &Builder;
+  DebugLoc OldDL;
+
+  DebugLocSetter(const DebugLocSetter&);
+  DebugLocSetter &operator=(const DebugLocSetter&);
+
+public:
+  /// \brief Set the debug location in the IRBuilder 'B' using the instruction
+  /// 'Inst'.
+  DebugLocSetter(IRBuilder<> &B, Instruction *Inst) : Builder(B) {
+    OldDL = Builder.getCurrentDebugLocation();
+    // Handle null instructions gracefully. This is so we can use a dyn_cast on
+    // values without nowing it is an instruction.
+    if (Inst)
+      Builder.SetCurrentDebugLocation(Inst->getDebugLoc());
+  }
+
+  ~DebugLocSetter() {
+    Builder.SetCurrentDebugLocation(OldDL);
+  }
+};
+
+/// \brief Look for a meaningful debug location on the instruction or it's
+/// operands.
+static Instruction *getDebugLocFromInstOrOperands(Instruction *I) {
+  if (!I)
+    return I;
+
+  DebugLoc Empty;
+  if (I->getDebugLoc() != Empty)
+    return I;
+
+  for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) {
+    if (Instruction *OpInst = dyn_cast<Instruction>(*OI))
+      if (OpInst->getDebugLoc() != Empty)
+        return OpInst;
+  }
+
+  return I;
+}
+
 /// \brief Check if conditionally executed loads are hoistable.
 ///
 /// This class has two functions: isHoistableLoad and canHoistAllLoads.
@@ -1195,6 +1238,7 @@ void InnerLoopVectorizer::vectorizeMemor
   // Handle consecutive loads/stores.
   GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
   if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) {
+    DebugLocSetter SetDL(Builder, Gep);
     Value *PtrOperand = Gep->getPointerOperand();
     Value *FirstBasePtr = getVectorValue(PtrOperand)[0];
     FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero);
@@ -1205,6 +1249,7 @@ void InnerLoopVectorizer::vectorizeMemor
     Gep2->setName("gep.indvar.base");
     Ptr = Builder.Insert(Gep2);
   } else if (Gep) {
+    DebugLocSetter SetDL(Builder, Gep);
     assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()),
                                OrigLoop) && "Base ptr must be invariant");
 
@@ -1237,6 +1282,7 @@ void InnerLoopVectorizer::vectorizeMemor
   } else {
     // Use the induction element ptr.
     assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
+    DebugLocSetter SetDL(Builder, cast<Instruction>(Ptr));
     VectorParts &PtrVal = getVectorValue(Ptr);
     Ptr = Builder.CreateExtractElement(PtrVal[0], Zero);
   }
@@ -1245,6 +1291,7 @@ void InnerLoopVectorizer::vectorizeMemor
   if (SI) {
     assert(!Legal->isUniform(SI->getPointerOperand()) &&
            "We do not allow storing to uniform addresses");
+    DebugLocSetter SetDL(Builder, SI);
     // We don't want to update the value in the map as it might be used in
     // another expression. So don't use a reference type for "StoredVal".
     VectorParts StoredVal = getVectorValue(SI->getValueOperand());
@@ -1269,6 +1316,9 @@ void InnerLoopVectorizer::vectorizeMemor
     return;
   }
 
+  // Handle loads.
+  assert(LI && "Must have a load instruction");
+  DebugLocSetter SetDL(Builder, LI);
   for (unsigned Part = 0; Part < UF; ++Part) {
     // Calculate the pointer for the specific unroll-part.
     Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
@@ -1292,6 +1342,8 @@ void InnerLoopVectorizer::scalarizeInstr
   // Holds vector parameters or scalars, in case of uniform vals.
   SmallVector<VectorParts, 4> Params;
 
+  DebugLocSetter SetDL(Builder, Instr);
+
   // Find all of the vectorized parameters.
   for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
     Value *SrcOp = Instr->getOperand(op);
@@ -1519,6 +1571,7 @@ InnerLoopVectorizer::createEmptyLoop(Loo
   Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
 
   // Generate the induction variable.
+  DebugLocSetter SetDL(Builder, getDebugLocFromInstOrOperands(OldInduction));
   Induction = Builder.CreatePHI(IdxTy, 2, "index");
   // The loop step is equal to the vectorization factor (num of SIMD elements)
   // times the unroll factor (num of SIMD instructions).
@@ -1527,6 +1580,8 @@ InnerLoopVectorizer::createEmptyLoop(Loo
   // This is the IR builder that we use to add all of the logic for bypassing
   // the new vector loop.
   IRBuilder<> BypassBuilder(BypassBlock->getTerminator());
+  DebugLocSetter SetDLByPass(BypassBuilder,
+                             getDebugLocFromInstOrOperands(OldInduction));
 
   // We may need to extend the index in case there is a type mismatch.
   // We know that the count starts at zero and does not overflow.
@@ -2066,6 +2121,8 @@ InnerLoopVectorizer::vectorizeLoop(LoopV
     for (unsigned part = 0; part < UF; ++part) {
       // This PHINode contains the vectorized reduction variable, or
       // the initial value vector, if we bypass the vector loop.
+      DebugLocSetter SetDL(Builder, RdxDesc.LoopExitInstr);
+
       VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr);
       PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
       Value *StartVal = (part == 0) ? VectorStart : Identity;
@@ -2079,6 +2136,8 @@ InnerLoopVectorizer::vectorizeLoop(LoopV
     Value *ReducedPartRdx = RdxParts[0];
     unsigned Op = getReductionBinOp(RdxDesc.Kind);
     for (unsigned part = 1; part < UF; ++part) {
+      DebugLocSetter SetDL(Builder, dyn_cast<Instruction>(RdxParts[part]));
+
       if (Op != Instruction::ICmp && Op != Instruction::FCmp)
         ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
                                              RdxParts[part], ReducedPartRdx,
@@ -2096,6 +2155,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopV
     Value *TmpVec = ReducedPartRdx;
     SmallVector<Constant*, 32> ShuffleMask(VF, 0);
     for (unsigned i = VF; i != 1; i >>= 1) {
+      DebugLocSetter SetDL(Builder, dyn_cast<Instruction>(ReducedPartRdx));
       // Move the upper half of the vector to the lower half.
       for (unsigned j = 0; j != i/2; ++j)
         ShuffleMask[j] = Builder.getInt32(i/2 + j);
@@ -2118,7 +2178,11 @@ InnerLoopVectorizer::vectorizeLoop(LoopV
     }
 
     // The result is in the first element of the vector.
-    Value *Scalar0 = Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+    Value *Scalar0;
+    {
+      DebugLocSetter SetDL(Builder, dyn_cast<Instruction>(ReducedPartRdx));
+      Scalar0 = Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+    }
 
     // Now, we need to fix the users of the reduction variable
     // inside and outside of the scalar remainder loop.
@@ -2253,6 +2317,7 @@ InnerLoopVectorizer::vectorizeBlockInLoo
 
       // Check for PHI nodes that are lowered to vector selects.
       if (P->getParent() != OrigLoop->getHeader()) {
+        DebugLocSetter SetDL(Builder, P);
         // We know that all PHIs in non header blocks are converted into
         // selects, so we don't have to worry about the insertion order and we
         // can just use the builder.
@@ -2295,6 +2360,8 @@ InnerLoopVectorizer::vectorizeBlockInLoo
       LoopVectorizationLegality::InductionInfo II =
         Legal->getInductionVars()->lookup(P);
 
+      DebugLocSetter SetDL(Builder, P);
+
       switch (II.IK) {
       case LoopVectorizationLegality::IK_NoInduction:
         llvm_unreachable("Unknown induction");
@@ -2402,6 +2469,7 @@ InnerLoopVectorizer::vectorizeBlockInLoo
     case Instruction::Xor: {
       // Just widen binops.
       BinaryOperator *BinOp = dyn_cast<BinaryOperator>(it);
+      DebugLocSetter SetDL(Builder, BinOp);
       VectorParts &A = getVectorValue(it->getOperand(0));
       VectorParts &B = getVectorValue(it->getOperand(1));
 
@@ -2428,6 +2496,7 @@ InnerLoopVectorizer::vectorizeBlockInLoo
       // instruction with a scalar condition. Otherwise, use vector-select.
       bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(it->getOperand(0)),
                                                OrigLoop);
+      DebugLocSetter SetDL(Builder, it);
 
       // The condition can be loop invariant  but still defined inside the
       // loop. This means that we can't just use the original 'cond' value.
@@ -2452,6 +2521,7 @@ InnerLoopVectorizer::vectorizeBlockInLoo
       // Widen compares. Generate vector compares.
       bool FCmp = (it->getOpcode() == Instruction::FCmp);
       CmpInst *Cmp = dyn_cast<CmpInst>(it);
+      DebugLocSetter SetDL(Builder, it);
       VectorParts &A = getVectorValue(it->getOperand(0));
       VectorParts &B = getVectorValue(it->getOperand(1));
       for (unsigned Part = 0; Part < UF; ++Part) {
@@ -2482,6 +2552,7 @@ InnerLoopVectorizer::vectorizeBlockInLoo
     case Instruction::FPTrunc:
     case Instruction::BitCast: {
       CastInst *CI = dyn_cast<CastInst>(it);
+      DebugLocSetter SetDL(Builder, it);
       /// Optimize the special case where the source is the induction
       /// variable. Notice that we can only optimize the 'trunc' case
       /// because: a. FP conversions lose precision, b. sext/zext may wrap,
@@ -2509,6 +2580,8 @@ InnerLoopVectorizer::vectorizeBlockInLoo
       if (isa<DbgInfoIntrinsic>(it))
         break;
 
+      DebugLocSetter SetDL(Builder, it);
+
       Module *M = BB->getParent()->getParent();
       CallInst *CI = cast<CallInst>(it);
       Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);

Added: llvm/trunk/test/Transforms/LoopVectorize/debugloc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/debugloc.ll?rev=185122&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/debugloc.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/debugloc.ll Thu Jun 27 19:38:54 2013
@@ -0,0 +1,92 @@
+; RUN: opt -S < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Make sure we are preserving debug info in the vectorized code.
+
+; CHECK: for.body.lr.ph
+; CHECK:   cmp.zero = icmp eq i64 {{.*}}, 0, !dbg !21
+; CHECK: vector.body
+; CHECK:   index {{.*}}, !dbg !21
+; CHECK:   getelementptr inbounds i32* %a, {{.*}}, !dbg !22
+; CHECK:   load <2 x i32>* {{.*}}, !dbg !22
+; CHECK:   add <2 x i32> {{.*}}, !dbg !22
+; CHECK:   add i64 %index, 2, !dbg !21
+; CHECK:   icmp eq i64 %index.next, %end.idx.rnd.down, !dbg !21
+; CHECK: middle.block
+; CHECK:   add <2 x i32> %rdx.vec.exit.phi, %rdx.shuf, !dbg !22
+; CHECK:   extractelement <2 x i32> %bin.rdx, i32 0, !dbg !22
+
+define i32 @f(i32* nocapture %a, i32 %size) #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32* %a}, i64 0, metadata !13), !dbg !19
+  tail call void @llvm.dbg.value(metadata !{i32 %size}, i64 0, metadata !14), !dbg !19
+  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !15), !dbg !20
+  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !16), !dbg !21
+  %cmp4 = icmp eq i32 %size, 0, !dbg !21
+  br i1 %cmp4, label %for.end, label %for.body.lr.ph, !dbg !21
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body, !dbg !21
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %sum.05 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv, !dbg !22
+  %0 = load i32* %arrayidx, align 4, !dbg !22, !tbaa !23
+  %add = add i32 %0, %sum.05, !dbg !22
+  tail call void @llvm.dbg.value(metadata !{i32 %add.lcssa}, i64 0, metadata !15), !dbg !22
+  %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !21
+  tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !16), !dbg !21
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !21
+  %exitcond = icmp ne i32 %lftr.wideiv, %size, !dbg !21
+  br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge, !dbg !21
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  br label %for.end, !dbg !21
+
+for.end:                                          ; preds = %entry, %for.cond.for.end_crit_edge
+  %sum.0.lcssa = phi i32 [ %add.lcssa, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i32 %sum.0.lcssa, !dbg !26
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+attributes #0 = { nounwind readonly ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 185038) (llvm/trunk 185097)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Data/backedup/dev/os/llvm/debug/-] [DW_LANG_C99]
+!1 = metadata !{metadata !"-", metadata !"/Volumes/Data/backedup/dev/os/llvm/debug"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32*, i32)* @f, null, null, metadata !12, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
+!5 = metadata !{metadata !"<stdin>", metadata !"/Volumes/Data/backedup/dev/os/llvm/debug"}
+!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>]
+!7 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9, metadata !10, metadata !11}
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!11 = metadata !{i32 786468, null, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
+!12 = metadata !{metadata !13, metadata !14, metadata !15, metadata !16}
+!13 = metadata !{i32 786689, metadata !4, metadata !"a", metadata !6, i32 16777219, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 3]
+!14 = metadata !{i32 786689, metadata !4, metadata !"size", metadata !6, i32 33554435, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [size] [line 3]
+!15 = metadata !{i32 786688, metadata !4, metadata !"sum", metadata !6, i32 4, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [sum] [line 4]
+!16 = metadata !{i32 786688, metadata !17, metadata !"i", metadata !6, i32 5, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 5]
+!17 = metadata !{i32 786443, metadata !5, metadata !4, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>]
+!18 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!19 = metadata !{i32 3, i32 0, metadata !4, null}
+!20 = metadata !{i32 4, i32 0, metadata !4, null}
+!21 = metadata !{i32 5, i32 0, metadata !17, null}
+!22 = metadata !{i32 6, i32 0, metadata !17, null}
+!23 = metadata !{metadata !"int", metadata !24}
+!24 = metadata !{metadata !"omnipotent char", metadata !25}
+!25 = metadata !{metadata !"Simple C/C++ TBAA"}
+!26 = metadata !{i32 7, i32 0, metadata !4, null}