[llvm] [VPlan] Set debug location for VPReduction/VPWidenPHI/VPWidenIntrinsicRecipe. (PR #120054)

Elvis Wang via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 16 03:19:58 PST 2024


https://github.com/ElvisWang123 updated https://github.com/llvm/llvm-project/pull/120054

>From a7d91131c2e56651f4b9546bfc10287fdee415ce Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Sun, 15 Dec 2024 19:45:13 -0800
Subject: [PATCH 1/4] Precommit test case.

---
 .../LoopVectorize/RISCV/preserve-dbg-loc.ll   | 36 +++++++++++++++++++
 .../LoopVectorize/preserve-dbg-loc.ll         | 33 +++++++++++++++++
 2 files changed, 69 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll
 create mode 100644 llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll

diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll b/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll
new file mode 100644
index 00000000000000..5993dc26a0e8fe
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll
@@ -0,0 +1,36 @@
+; RUN: opt -passes=debugify,loop-vectorize \
+; RUN: -force-tail-folding-style=data-with-evl \
+; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -S < %s 2>&1 | FileCheck --check-prefix=DEBUGLOC %s
+
+; Testing the debug locations of the generated vector intstruction are same as
+; their scalar instruction.
+
+; DEBUGLOC-LABEL: define void @vp_select(
+define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
+; DEBUGLOC: vector.body:
+; DEBUGLOC:   %[[VPSel:[0-9]+]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %15, <vscale x 4 x i32> %vp.op.load1, <vscale x 4 x i32> %vp.op, i32 %9)
+; DEBUGLOC: for.body:
+; DEBUGLOC:   %cond.p = select i1 %cmp4, i32 %22, i32 %23, !dbg !39
+ entry:
+   br label %for.body
+
+ for.body:
+   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+   %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+   %0 = load i32, ptr %arrayidx, align 4
+   %arrayidx3 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
+   %1 = load i32, ptr %arrayidx3, align 4
+   %cmp4 = icmp sgt i32 %0, %1
+   %2 = sub i32 0, %1
+   %cond.p = select i1 %cmp4, i32 %1, i32 %2
+   %cond = add i32 %cond.p, %0
+   %arrayidx15 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+   store i32 %cond, ptr %arrayidx15, align 4
+   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+   %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+   br i1 %exitcond.not, label %exit, label %for.body
+
+ exit:
+   ret void
+ }
diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll
new file mode 100644
index 00000000000000..02cac45f1c5cc5
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s -check-prefix DEBUGLOC
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Testing the debug locations of the generated vector intstruction are same as
+; their scalar instruction.
+
+; DEBUGLOC-LABEL: define i32 @reduction_sum(
+define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
+; DEBUGLOC: vector.body:
+; DEBUGLOC:   %[[VecLoad:.*]] = load <4 x i32>, ptr %2, align 4, !dbg ![[LoadLoc0:[0-9]+]]
+; DEBUGLOC:   %[[VecRed:.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %wide.load), !dbg ![[LoadLoc0]]
+; DEBUGLOC: .lr.ph:
+; DEBUGLOC:   %l3 = load i32, ptr %l2, align 4, !dbg ![[LoadLoc0]]
+; DEBUGLOC:   %l7 = add i32 %sum.02, %l3, !dbg ![[RedLoc0:[0-9]+]]
+entry:
+  br label %.lr.ph
+
+.lr.ph:                                           ; preds = %entry, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l6 = trunc i64 %indvars.iv to i32
+  %l7 = add i32 %sum.02, %l3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph
+  %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ]
+  ret i32 %sum.0.lcssa
+}

>From eb41b83c8799e68a1b79699d9511b5c64b1b6991 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Sun, 15 Dec 2024 21:45:32 -0800
Subject: [PATCH 2/4] [VPlan] Set debug loc for
 VPReduction/VPWidenPHI/VPWidenIntrinsicRecipe.

This patch add missing debug location for
VPReduction/VPWidenIntrinsicRecipe and align the debug location for
VPWidenPHIRecipe to its scalar counterpart.
---
 llvm/lib/Transforms/Vectorize/VPlan.h                     | 8 +++++---
 llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp            | 2 ++
 .../Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll    | 4 ++--
 llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll | 4 ++--
 llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll    | 4 ++--
 5 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 8794517b777f3b..6ebf2ec36e3789 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1662,7 +1662,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
   VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
                          ArrayRef<VPValue *> CallArguments, Type *Ty,
                          DebugLoc DL = {})
-      : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments),
+      : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL),
         VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {
     LLVMContext &Ctx = Ty->getContext();
     AttributeList Attrs = Intrinsic::getAttributes(Ctx, VectorIntrinsicID);
@@ -2288,7 +2288,8 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe {
 public:
   /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
   VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
-      : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
+      : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi,
+                          Phi->getDebugLoc()) {
     if (Start)
       addOperand(Start);
   }
@@ -2590,7 +2591,8 @@ class VPReductionRecipe : public VPSingleDefRecipe {
   VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
                     Instruction *I, ArrayRef<VPValue *> Operands,
                     VPValue *CondOp, bool IsOrdered)
-      : VPSingleDefRecipe(SC, Operands, I), RdxDesc(R), IsOrdered(IsOrdered) {
+      : VPSingleDefRecipe(SC, Operands, I, I->getDebugLoc()), RdxDesc(R),
+        IsOrdered(IsOrdered) {
     if (CondOp) {
       IsConditional = true;
       addOperand(CondOp);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index e882368544e815..4179a9eebab3b7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2108,6 +2108,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
   // Propagate the fast-math flags carried by the underlying instruction.
   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
   State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
+  State.setDebugLocFrom(getDebugLoc());
   Value *NewVecOp = State.get(getVecOp());
   if (VPValue *Cond = getCondOp()) {
     Value *NewCond = State.get(Cond, State.VF.isScalar());
@@ -3424,6 +3425,7 @@ void VPWidenPHIRecipe::execute(VPTransformState &State) {
   assert(EnableVPlanNativePath &&
          "Non-native vplans are not expected to have VPWidenPHIRecipes.");
 
+  State.setDebugLocFrom(getDebugLoc());
   Value *Op0 = State.get(getOperand(0));
   Type *VecTy = Op0->getType();
   Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll b/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll
index 5993dc26a0e8fe..9998c44e6e3907 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll
@@ -9,9 +9,9 @@
 ; DEBUGLOC-LABEL: define void @vp_select(
 define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
 ; DEBUGLOC: vector.body:
-; DEBUGLOC:   %[[VPSel:[0-9]+]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %15, <vscale x 4 x i32> %vp.op.load1, <vscale x 4 x i32> %vp.op, i32 %9)
+; DEBUGLOC:   %[[VPSel:[0-9]+]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %15, <vscale x 4 x i32> %vp.op.load1, <vscale x 4 x i32> %vp.op, i32 %9), !dbg ![[SelLoc:[0-9]+]]
 ; DEBUGLOC: for.body:
-; DEBUGLOC:   %cond.p = select i1 %cmp4, i32 %22, i32 %23, !dbg !39
+; DEBUGLOC:   %cond.p = select i1 %cmp4, i32 %22, i32 %23, !dbg ![[SelLoc]]
  entry:
    br label %for.body
 
diff --git a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
index 66aceab9fb27c8..44afa34100c299 100644
--- a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
+++ b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
@@ -15,8 +15,8 @@ define void @foo(ptr %h) !dbg !4 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND_CLEANUP32:%.*]] ]
 ; CHECK-NEXT:    br label [[FOR_COND5_PREHEADER1:%.*]], !dbg [[DBG21]]
 ; CHECK:       for.cond5.preheader1:
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ], !dbg [[DBG21]]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]], !dbg [[DBG21]]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]]
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> zeroinitializer, <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22:![0-9]+]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 1, !dbg [[DBG22]]
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 1), <4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22]]
diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll
index 02cac45f1c5cc5..12e8b92523d31f 100644
--- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll
+++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll
@@ -8,10 +8,10 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; DEBUGLOC: vector.body:
 ; DEBUGLOC:   %[[VecLoad:.*]] = load <4 x i32>, ptr %2, align 4, !dbg ![[LoadLoc0:[0-9]+]]
-; DEBUGLOC:   %[[VecRed:.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %wide.load), !dbg ![[LoadLoc0]]
+; DEBUGLOC:   %[[VecRed:.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %wide.load), !dbg ![[RedLoc0:[0-9]+]]
 ; DEBUGLOC: .lr.ph:
 ; DEBUGLOC:   %l3 = load i32, ptr %l2, align 4, !dbg ![[LoadLoc0]]
-; DEBUGLOC:   %l7 = add i32 %sum.02, %l3, !dbg ![[RedLoc0:[0-9]+]]
+; DEBUGLOC:   %l7 = add i32 %sum.02, %l3, !dbg ![[RedLoc0]]
 entry:
   br label %.lr.ph
 

>From 22776dd8dd61ed7ed93bb1f4f38880519a3ed6be Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Mon, 16 Dec 2024 02:13:01 -0800
Subject: [PATCH 3/4] !fixup, Get debugloc directly.

---
 .../lib/Transforms/Vectorize/LoopVectorize.cpp |  6 +++---
 llvm/lib/Transforms/Vectorize/VPlan.h          | 18 +++++++++---------
 .../Transforms/Vectorize/VPlanHCFGBuilder.cpp  |  2 +-
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index de164ee434d647..c5a16d54625612 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9518,9 +9518,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
       if (CM.blockNeedsPredicationForAnyReason(BB))
         CondOp = RecipeBuilder.getBlockInMask(BB);
 
-      VPReductionRecipe *RedRecipe =
-          new VPReductionRecipe(RdxDesc, CurrentLinkI, PreviousLink, VecOp,
-                                CondOp, CM.useOrderedReductions(RdxDesc));
+      VPReductionRecipe *RedRecipe = new VPReductionRecipe(
+          RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp,
+          CM.useOrderedReductions(RdxDesc), CurrentLinkI->getDebugLoc());
       // Append the recipe to the end of the VPBasicBlock because we need to
       // ensure that it comes after all of it's inputs, including CondOp.
       // Note that this transformation may leave over dead recipes (including
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 6ebf2ec36e3789..f7a4830e928cd9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2287,9 +2287,8 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe {
 
 public:
   /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
-  VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
-      : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi,
-                          Phi->getDebugLoc()) {
+  VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr, DebugLoc DL = {})
+      : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi, DL) {
     if (Start)
       addOperand(Start);
   }
@@ -2590,8 +2589,8 @@ class VPReductionRecipe : public VPSingleDefRecipe {
 protected:
   VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
                     Instruction *I, ArrayRef<VPValue *> Operands,
-                    VPValue *CondOp, bool IsOrdered)
-      : VPSingleDefRecipe(SC, Operands, I, I->getDebugLoc()), RdxDesc(R),
+                    VPValue *CondOp, bool IsOrdered, DebugLoc DL)
+      : VPSingleDefRecipe(SC, Operands, I, DL), RdxDesc(R),
         IsOrdered(IsOrdered) {
     if (CondOp) {
       IsConditional = true;
@@ -2602,16 +2601,17 @@ class VPReductionRecipe : public VPSingleDefRecipe {
 public:
   VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
                     VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
-                    bool IsOrdered)
+                    bool IsOrdered, DebugLoc DL = {})
       : VPReductionRecipe(VPDef::VPReductionSC, R, I,
                           ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
-                          IsOrdered) {}
+                          IsOrdered, DL) {}
 
   ~VPReductionRecipe() override = default;
 
   VPReductionRecipe *clone() override {
     return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
-                                 getVecOp(), getCondOp(), IsOrdered);
+                                 getVecOp(), getCondOp(), IsOrdered,
+                                 getDebugLoc());
   }
 
   static inline bool classof(const VPRecipeBase *R) {
@@ -2666,7 +2666,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
             VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
             cast_or_null<Instruction>(R.getUnderlyingValue()),
             ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
-            R.isOrdered()) {}
+            R.isOrdered(), R.getDebugLoc()) {}
 
   ~VPReductionEVLRecipe() override = default;
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 6e633739fcc3dd..140cea3c700d87 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -308,7 +308,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
       // Phi node's operands may have not been visited at this point. We create
       // an empty VPInstruction that we will fix once the whole plain CFG has
       // been built.
-      NewVPV = new VPWidenPHIRecipe(Phi);
+      NewVPV = new VPWidenPHIRecipe(Phi, nullptr, Phi->getDebugLoc());
       VPBB->appendRecipe(cast<VPWidenPHIRecipe>(NewVPV));
       PhisToFix.push_back(Phi);
     } else {

>From 0296fc3ffacb507ba650320c9d1101b087cb5604 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Mon, 16 Dec 2024 02:34:45 -0800
Subject: [PATCH 4/4] Fixup! Update test cases.

Rename preserve-dbg-loc.ll.
Add test case for VPWidenIntrinsicRecipe for defalult target in
preserve-dbg-loc-and-loop-metadata.ll
---
 .../preserve-dbg-loc-and-loop-metadata.ll     | 35 ++++++++++++++++---
 ...l => preserve-dbg-loc-reduction-inloop.ll} | 24 ++++++-------
 2 files changed, 42 insertions(+), 17 deletions(-)
 rename llvm/test/Transforms/LoopVectorize/{preserve-dbg-loc.ll => preserve-dbg-loc-reduction-inloop.ll} (59%)

diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
index 9cb9a2e93d0a9e..848c321eae7d54 100644
--- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
+++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -passes=loop-vectorize -S 2>&1 | FileCheck %s
-; RUN: opt < %s -passes=debugify,loop-vectorize -S | FileCheck %s -check-prefix DEBUGLOC
-; RUN: opt < %s -passes=debugify,loop-vectorize -S --try-experimental-debuginfo-iterators | FileCheck %s -check-prefix DEBUGLOC
+; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s -check-prefix DEBUGLOC
+; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S --try-experimental-debuginfo-iterators | FileCheck %s -check-prefix DEBUGLOC
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; This test makes sure we don't duplicate the loop vectorizer's metadata
@@ -26,14 +26,41 @@ for.body:                                         ; preds = %for.body, %entry
 
 ; DEBUGLOC: scalar.ph:
 ; DEBUGLOC-NEXT:    %bc.resume.val = phi {{.*}} !dbg ![[DbgLoc:[0-9]+]]
-;
-; DEBUGLOC: ![[DbgLoc]] = !DILocation(line: 2
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
+
+; Test if the debug loc of intrinsic call is same as the salar counterpart.
+
+; DEBUGLOC-LABEL: define void @test_widen_intrinsic_recipe(
+define void @test_widen_intrinsic_recipe(i64 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
+
+; DEBUGLOC: %[[TMP1:.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %[[TMP0:.*]]), !dbg ![[INTRINSIC_LOC:[0-9]+]]
+; DEBUGLOC: %call = tail call float @llvm.sqrt.f32(float %lv) #2, !dbg ![[INTRINSIC_LOC]]
+
+entry:
+  %cmp6 = icmp sgt i64 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %arrayidx, align 4
+  %call = tail call float @llvm.sqrt.f32(float %lv) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %arrayidx2, align 4
+  %iv.next = add i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
 !0 = !{!0, !1}
 !1 = !{!"llvm.loop.vectorize.width", i32 4}
+; DEBUGLOC: ![[DbgLoc]] = !DILocation(line: 2
 ; CHECK-NOT: !{metadata !"llvm.loop.vectorize.width", i32 4}
 ; CHECK: !{!"llvm.loop.isvectorized", i32 1}
diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll
similarity index 59%
rename from llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll
rename to llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll
index 12e8b92523d31f..dc77d89da6cf89 100644
--- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll
+++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll
@@ -9,25 +9,23 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; DEBUGLOC: vector.body:
 ; DEBUGLOC:   %[[VecLoad:.*]] = load <4 x i32>, ptr %2, align 4, !dbg ![[LoadLoc0:[0-9]+]]
 ; DEBUGLOC:   %[[VecRed:.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %wide.load), !dbg ![[RedLoc0:[0-9]+]]
-; DEBUGLOC: .lr.ph:
+; DEBUGLOC: loop:
 ; DEBUGLOC:   %l3 = load i32, ptr %l2, align 4, !dbg ![[LoadLoc0]]
 ; DEBUGLOC:   %l7 = add i32 %sum.02, %l3, !dbg ![[RedLoc0]]
 entry:
-  br label %.lr.ph
+  br label %loop
 
-.lr.ph:                                           ; preds = %entry, %.lr.ph
-  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
-  %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+loop:
+  %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %l7, %loop ], [ 0, %entry ]
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %iv
   %l3 = load i32, ptr %l2, align 4
-  %l6 = trunc i64 %indvars.iv to i32
   %l7 = add i32 %sum.02, %l3
-  %indvars.iv.next = add i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, 256
-  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+  %iv.next = add i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 256
+  br i1 %exitcond, label %exit, label %loop
 
-._crit_edge:                                      ; preds = %.lr.ph
-  %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ]
+exit:
+  %sum.0.lcssa = phi i32 [ %l7, %loop ]
   ret i32 %sum.0.lcssa
 }



More information about the llvm-commits mailing list