[llvm] [VPlan] Set debug location for VPReduction/VPWidenPHI/VPWidenIntrinsicRecipe. (PR #120054)
Elvis Wang via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 16 04:34:58 PST 2024
https://github.com/ElvisWang123 updated https://github.com/llvm/llvm-project/pull/120054
>From 7508a6da543717be8fc0621b0522a86f43d1dd90 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Sun, 15 Dec 2024 19:45:13 -0800
Subject: [PATCH 1/6] Precommit test case.
---
.../LoopVectorize/RISCV/preserve-dbg-loc.ll | 36 +++++++++++++++++++
.../LoopVectorize/preserve-dbg-loc.ll | 33 +++++++++++++++++
2 files changed, 69 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll
create mode 100644 llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll b/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll
new file mode 100644
index 00000000000000..5993dc26a0e8fe
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll
@@ -0,0 +1,36 @@
+; RUN: opt -passes=debugify,loop-vectorize \
+; RUN: -force-tail-folding-style=data-with-evl \
+; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -S < %s 2>&1 | FileCheck --check-prefix=DEBUGLOC %s
+
+; Testing the debug locations of the generated vector intstruction are same as
+; their scalar instruction.
+
+; DEBUGLOC-LABEL: define void @vp_select(
+define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
+; DEBUGLOC: vector.body:
+; DEBUGLOC: %[[VPSel:[0-9]+]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %15, <vscale x 4 x i32> %vp.op.load1, <vscale x 4 x i32> %vp.op, i32 %9)
+; DEBUGLOC: for.body:
+; DEBUGLOC: %cond.p = select i1 %cmp4, i32 %22, i32 %23, !dbg !39
+ entry:
+ br label %for.body
+
+ for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
+ %1 = load i32, ptr %arrayidx3, align 4
+ %cmp4 = icmp sgt i32 %0, %1
+ %2 = sub i32 0, %1
+ %cond.p = select i1 %cmp4, i32 %1, i32 %2
+ %cond = add i32 %cond.p, %0
+ %arrayidx15 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ store i32 %cond, ptr %arrayidx15, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %exit, label %for.body
+
+ exit:
+ ret void
+ }
diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll
new file mode 100644
index 00000000000000..02cac45f1c5cc5
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s -check-prefix DEBUGLOC
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Testing the debug locations of the generated vector intstruction are same as
+; their scalar instruction.
+
+; DEBUGLOC-LABEL: define i32 @reduction_sum(
+define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
+; DEBUGLOC: vector.body:
+; DEBUGLOC: %[[VecLoad:.*]] = load <4 x i32>, ptr %2, align 4, !dbg ![[LoadLoc0:[0-9]+]]
+; DEBUGLOC: %[[VecRed:.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %wide.load), !dbg ![[LoadLoc0]]
+; DEBUGLOC: .lr.ph:
+; DEBUGLOC: %l3 = load i32, ptr %l2, align 4, !dbg ![[LoadLoc0]]
+; DEBUGLOC: %l7 = add i32 %sum.02, %l3, !dbg ![[RedLoc0:[0-9]+]]
+entry:
+ br label %.lr.ph
+
+.lr.ph: ; preds = %entry, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
+ %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
+ %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+ %l3 = load i32, ptr %l2, align 4
+ %l6 = trunc i64 %indvars.iv to i32
+ %l7 = add i32 %sum.02, %l3
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 256
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph
+ %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ]
+ ret i32 %sum.0.lcssa
+}
>From dc5c061dd936fad163944571b8fed81046de8b28 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Sun, 15 Dec 2024 21:45:32 -0800
Subject: [PATCH 2/6] [VPlan] Set debug loc for
VPReduction/VPWidenPHI/VPWidenIntrinsicRecipe.
This patch add missing debug location for
VPReduction/VPWidenIntrinsicRecipe and align the debug location for
VPWidenPHIRecipe to its scalar counterpart.
---
llvm/lib/Transforms/Vectorize/VPlan.h | 8 +++++---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 ++
.../Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll | 4 ++--
llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll | 4 ++--
llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll | 4 ++--
5 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 12208a7968338b..8750f1f8347d62 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1673,7 +1673,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
ArrayRef<VPValue *> CallArguments, Type *Ty,
DebugLoc DL = {})
- : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments),
+ : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL),
VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {
LLVMContext &Ctx = Ty->getContext();
AttributeList Attrs = Intrinsic::getAttributes(Ctx, VectorIntrinsicID);
@@ -2313,7 +2313,8 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe {
public:
/// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
- : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
+ : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi,
+ Phi->getDebugLoc()) {
if (Start)
addOperand(Start);
}
@@ -2615,7 +2616,8 @@ class VPReductionRecipe : public VPSingleDefRecipe {
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
Instruction *I, ArrayRef<VPValue *> Operands,
VPValue *CondOp, bool IsOrdered)
- : VPSingleDefRecipe(SC, Operands, I), RdxDesc(R), IsOrdered(IsOrdered) {
+ : VPSingleDefRecipe(SC, Operands, I, I->getDebugLoc()), RdxDesc(R),
+ IsOrdered(IsOrdered) {
if (CondOp) {
IsConditional = true;
addOperand(CondOp);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index ab95b647f211b7..0580ff8b83632e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2114,6 +2114,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
// Propagate the fast-math flags carried by the underlying instruction.
IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
+ State.setDebugLocFrom(getDebugLoc());
Value *NewVecOp = State.get(getVecOp());
if (VPValue *Cond = getCondOp()) {
Value *NewCond = State.get(Cond, State.VF.isScalar());
@@ -3456,6 +3457,7 @@ void VPWidenPHIRecipe::execute(VPTransformState &State) {
assert(EnableVPlanNativePath &&
"Non-native vplans are not expected to have VPWidenPHIRecipes.");
+ State.setDebugLocFrom(getDebugLoc());
Value *Op0 = State.get(getOperand(0));
Type *VecTy = Op0->getType();
Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll b/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll
index 5993dc26a0e8fe..9998c44e6e3907 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll
@@ -9,9 +9,9 @@
; DEBUGLOC-LABEL: define void @vp_select(
define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
; DEBUGLOC: vector.body:
-; DEBUGLOC: %[[VPSel:[0-9]+]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %15, <vscale x 4 x i32> %vp.op.load1, <vscale x 4 x i32> %vp.op, i32 %9)
+; DEBUGLOC: %[[VPSel:[0-9]+]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %15, <vscale x 4 x i32> %vp.op.load1, <vscale x 4 x i32> %vp.op, i32 %9), !dbg ![[SelLoc:[0-9]+]]
; DEBUGLOC: for.body:
-; DEBUGLOC: %cond.p = select i1 %cmp4, i32 %22, i32 %23, !dbg !39
+; DEBUGLOC: %cond.p = select i1 %cmp4, i32 %22, i32 %23, !dbg ![[SelLoc]]
entry:
br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
index 66aceab9fb27c8..44afa34100c299 100644
--- a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
+++ b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
@@ -15,8 +15,8 @@ define void @foo(ptr %h) !dbg !4 {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND_CLEANUP32:%.*]] ]
; CHECK-NEXT: br label [[FOR_COND5_PREHEADER1:%.*]], !dbg [[DBG21]]
; CHECK: for.cond5.preheader1:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ], !dbg [[DBG21]]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]], !dbg [[DBG21]]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]]
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> zeroinitializer, <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22:![0-9]+]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 1, !dbg [[DBG22]]
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 1), <4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22]]
diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll
index 02cac45f1c5cc5..12e8b92523d31f 100644
--- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll
+++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll
@@ -8,10 +8,10 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
; DEBUGLOC: vector.body:
; DEBUGLOC: %[[VecLoad:.*]] = load <4 x i32>, ptr %2, align 4, !dbg ![[LoadLoc0:[0-9]+]]
-; DEBUGLOC: %[[VecRed:.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %wide.load), !dbg ![[LoadLoc0]]
+; DEBUGLOC: %[[VecRed:.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %wide.load), !dbg ![[RedLoc0:[0-9]+]]
; DEBUGLOC: .lr.ph:
; DEBUGLOC: %l3 = load i32, ptr %l2, align 4, !dbg ![[LoadLoc0]]
-; DEBUGLOC: %l7 = add i32 %sum.02, %l3, !dbg ![[RedLoc0:[0-9]+]]
+; DEBUGLOC: %l7 = add i32 %sum.02, %l3, !dbg ![[RedLoc0]]
entry:
br label %.lr.ph
>From acabfbab6197f2790d7f31eeab98686ed63ce0d8 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Mon, 16 Dec 2024 02:13:01 -0800
Subject: [PATCH 3/6] !fixup, Get debugloc directly.
---
.../lib/Transforms/Vectorize/LoopVectorize.cpp | 6 +++---
llvm/lib/Transforms/Vectorize/VPlan.h | 18 +++++++++---------
.../Transforms/Vectorize/VPlanHCFGBuilder.cpp | 2 +-
3 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 61f7bd84902815..967aa4048f80f1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9575,9 +9575,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (CM.blockNeedsPredicationForAnyReason(BB))
CondOp = RecipeBuilder.getBlockInMask(BB);
- VPReductionRecipe *RedRecipe =
- new VPReductionRecipe(RdxDesc, CurrentLinkI, PreviousLink, VecOp,
- CondOp, CM.useOrderedReductions(RdxDesc));
+ VPReductionRecipe *RedRecipe = new VPReductionRecipe(
+ RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp,
+ CM.useOrderedReductions(RdxDesc), CurrentLinkI->getDebugLoc());
// Append the recipe to the end of the VPBasicBlock because we need to
// ensure that it comes after all of it's inputs, including CondOp.
// Note that this transformation may leave over dead recipes (including
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 8750f1f8347d62..d4ee33f815f57a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2312,9 +2312,8 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe {
public:
/// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
- VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
- : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi,
- Phi->getDebugLoc()) {
+ VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr, DebugLoc DL = {})
+ : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi, DL) {
if (Start)
addOperand(Start);
}
@@ -2615,8 +2614,8 @@ class VPReductionRecipe : public VPSingleDefRecipe {
protected:
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
Instruction *I, ArrayRef<VPValue *> Operands,
- VPValue *CondOp, bool IsOrdered)
- : VPSingleDefRecipe(SC, Operands, I, I->getDebugLoc()), RdxDesc(R),
+ VPValue *CondOp, bool IsOrdered, DebugLoc DL)
+ : VPSingleDefRecipe(SC, Operands, I, DL), RdxDesc(R),
IsOrdered(IsOrdered) {
if (CondOp) {
IsConditional = true;
@@ -2627,16 +2626,17 @@ class VPReductionRecipe : public VPSingleDefRecipe {
public:
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
- bool IsOrdered)
+ bool IsOrdered, DebugLoc DL = {})
: VPReductionRecipe(VPDef::VPReductionSC, R, I,
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
- IsOrdered) {}
+ IsOrdered, DL) {}
~VPReductionRecipe() override = default;
VPReductionRecipe *clone() override {
return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
- getVecOp(), getCondOp(), IsOrdered);
+ getVecOp(), getCondOp(), IsOrdered,
+ getDebugLoc());
}
static inline bool classof(const VPRecipeBase *R) {
@@ -2691,7 +2691,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
cast_or_null<Instruction>(R.getUnderlyingValue()),
ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
- R.isOrdered()) {}
+ R.isOrdered(), R.getDebugLoc()) {}
~VPReductionEVLRecipe() override = default;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 6e633739fcc3dd..140cea3c700d87 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -308,7 +308,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
// Phi node's operands may have not been visited at this point. We create
// an empty VPInstruction that we will fix once the whole plain CFG has
// been built.
- NewVPV = new VPWidenPHIRecipe(Phi);
+ NewVPV = new VPWidenPHIRecipe(Phi, nullptr, Phi->getDebugLoc());
VPBB->appendRecipe(cast<VPWidenPHIRecipe>(NewVPV));
PhisToFix.push_back(Phi);
} else {
>From c8a220c6285fda048913707468311239f5f58846 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Mon, 16 Dec 2024 02:34:45 -0800
Subject: [PATCH 4/6] Fixup! Update test cases.
Rename preserve-dbg-loc.ll.
Add test case for VPWidenIntrinsicRecipe for defalult target in
preserve-dbg-loc-and-loop-metadata.ll
---
.../LoopVectorize/RISCV/preserve-dbg-loc.ll | 9 ++---
.../preserve-dbg-loc-and-loop-metadata.ll | 33 +++++++++++++++++--
.../preserve-dbg-loc-reduction-inloop.ll | 31 +++++++++++++++++
.../LoopVectorize/preserve-dbg-loc.ll | 33 -------------------
4 files changed, 66 insertions(+), 40 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll
delete mode 100644 llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll b/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll
index 9998c44e6e3907..8af04e0e43ed10 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/preserve-dbg-loc.ll
@@ -4,14 +4,15 @@
; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -S < %s 2>&1 | FileCheck --check-prefix=DEBUGLOC %s
; Testing the debug locations of the generated vector intstruction are same as
-; their scalar instruction.
+; their scalar counterpart.
+define void @vp_select(ptr %a, ptr %b, ptr %c, i64 %N) {
; DEBUGLOC-LABEL: define void @vp_select(
-define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
; DEBUGLOC: vector.body:
-; DEBUGLOC: %[[VPSel:[0-9]+]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %15, <vscale x 4 x i32> %vp.op.load1, <vscale x 4 x i32> %vp.op, i32 %9), !dbg ![[SelLoc:[0-9]+]]
+; DEBUGLOC: = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %{{.+}}, <vscale x 4 x i32> %{{.+}}, <vscale x 4 x i32> %{{.+}}, i32 %{{.+}}), !dbg ![[SelLoc:[0-9]+]]
; DEBUGLOC: for.body:
-; DEBUGLOC: %cond.p = select i1 %cmp4, i32 %22, i32 %23, !dbg ![[SelLoc]]
+; DEBUGLOC: %cond.p = select i1 %cmp4, i32 %{{.*}}, i32 %{{.*}}, !dbg ![[SelLoc]]
+;
entry:
br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
index 2eec61b859f13e..ddc227f0be580d 100644
--- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
+++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
@@ -1,6 +1,6 @@
-; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -S 2>&1 | FileCheck %s
-; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-width=4 -S | FileCheck %s -check-prefix DEBUGLOC
-; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-width=4 -S --try-experimental-debuginfo-iterators | FileCheck %s -check-prefix DEBUGLOC
+; RUN: opt < %s -passes=loop-vectorize -S 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s -check-prefix DEBUGLOC
+; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S --try-experimental-debuginfo-iterators | FileCheck %s -check-prefix DEBUGLOC
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; This test makes sure we don't duplicate the loop vectorizer's metadata
@@ -54,6 +54,32 @@ exit:
ret void
}
+define void @widen_intrinsic_dbg(i64 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
+; DEBUGLOC-LABEL: define void @widen_intrinsic_dbg(
+;
+; DEBUGLOC: vector.body:
+; DEBUGLOC: = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !dbg ![[INTRINSIC_LOC:[0-9]+]]
+; DEBUGLOC: for.body:
+; DEBUGLOC: %call = tail call float @llvm.sqrt.f32(float %lv) #2, !dbg ![[INTRINSIC_LOC]]
+;
+entry:
+ %cmp6 = icmp sgt i64 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body:
+ %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, ptr %y, i64 %iv
+ %lv = load float, ptr %arrayidx, align 4
+ %call = tail call float @llvm.sqrt.f32(float %lv) nounwind readnone
+ %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %iv
+ store float %call, ptr %arrayidx2, align 4
+ %iv.next = add i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
!0 = !{!0, !1}
!1 = !{!"llvm.loop.vectorize.width", i32 4}
@@ -62,3 +88,4 @@ exit:
; DEBUGLOC: ![[RESUMELOC]] = !DILocation(line: 2
; DEBUGLOC: ![[PTRIVLOC]] = !DILocation(line: 12
+; DEBUGLOC: ![[INTRINSIC_LOC]] = !DILocation(line: 23
diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll
new file mode 100644
index 00000000000000..8800928f5b87b5
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s -check-prefix DEBUGLOC
+
+; Testing the debug locations of the generated vector intstruction are same as
+; their scalar counterpart.
+
+; DEBUGLOC-LABEL: define i32 @reduction_sum(
+define i32 @reduction_sum(ptr %A, ptr %B) {
+; DEBUGLOC: vector.body:
+; DEBUGLOC: %[[VecLoad:.*]] = load <4 x i32>, ptr %2, align 4, !dbg ![[LoadLoc0:[0-9]+]]
+; DEBUGLOC: %[[VecRed:.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %wide.load), !dbg ![[RedLoc0:[0-9]+]]
+; DEBUGLOC: loop:
+; DEBUGLOC: %l3 = load i32, ptr %l2, align 4, !dbg ![[LoadLoc0]]
+; DEBUGLOC: %l7 = add i32 %sum.02, %l3, !dbg ![[RedLoc0]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %sum.02 = phi i32 [ 0, %entry ], [ %l7, %loop ]
+ %l2 = getelementptr inbounds i32, ptr %A, i64 %iv
+ %l3 = load i32, ptr %l2, align 4
+ %l7 = add i32 %sum.02, %l3
+ %iv.next = add i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 256
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ %sum.0.lcssa = phi i32 [ %l7, %loop ]
+ ret i32 %sum.0.lcssa
+}
diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll
deleted file mode 100644
index 12e8b92523d31f..00000000000000
--- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s -check-prefix DEBUGLOC
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-; Testing the debug locations of the generated vector intstruction are same as
-; their scalar instruction.
-
-; DEBUGLOC-LABEL: define i32 @reduction_sum(
-define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
-; DEBUGLOC: vector.body:
-; DEBUGLOC: %[[VecLoad:.*]] = load <4 x i32>, ptr %2, align 4, !dbg ![[LoadLoc0:[0-9]+]]
-; DEBUGLOC: %[[VecRed:.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %wide.load), !dbg ![[RedLoc0:[0-9]+]]
-; DEBUGLOC: .lr.ph:
-; DEBUGLOC: %l3 = load i32, ptr %l2, align 4, !dbg ![[LoadLoc0]]
-; DEBUGLOC: %l7 = add i32 %sum.02, %l3, !dbg ![[RedLoc0]]
-entry:
- br label %.lr.ph
-
-.lr.ph: ; preds = %entry, %.lr.ph
- %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
- %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
- %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
- %l3 = load i32, ptr %l2, align 4
- %l6 = trunc i64 %indvars.iv to i32
- %l7 = add i32 %sum.02, %l3
- %indvars.iv.next = add i64 %indvars.iv, 1
- %lftr.wideiv = trunc i64 %indvars.iv.next to i32
- %exitcond = icmp eq i32 %lftr.wideiv, 256
- br i1 %exitcond, label %._crit_edge, label %.lr.ph
-
-._crit_edge: ; preds = %.lr.ph
- %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ]
- ret i32 %sum.0.lcssa
-}
>From 24077c711b2d9f17ce5ecb0f4b7f8c397b7d68e9 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Mon, 16 Dec 2024 04:05:40 -0800
Subject: [PATCH 5/6] !fixup address comments and rebase
---
.../LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll | 9 ++++-----
.../LoopVectorize/preserve-dbg-loc-reduction-inloop.ll | 4 ++--
2 files changed, 6 insertions(+), 7 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
index ddc227f0be580d..567a5f8d6143fb 100644
--- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
+++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
@@ -1,6 +1,6 @@
-; RUN: opt < %s -passes=loop-vectorize -S 2>&1 | FileCheck %s
-; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s -check-prefix DEBUGLOC
-; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S --try-experimental-debuginfo-iterators | FileCheck %s -check-prefix DEBUGLOC
+; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -S 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-width=4 -S | FileCheck %s -check-prefix DEBUGLOC
+; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-width=4 -S --try-experimental-debuginfo-iterators | FileCheck %s -check-prefix DEBUGLOC
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; This test makes sure we don't duplicate the loop vectorizer's metadata
@@ -54,9 +54,8 @@ exit:
ret void
}
-define void @widen_intrinsic_dbg(i64 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
+define void @widen_intrinsic_dbg(i64 %n, ptr %y, ptr %x) nounwind uwtable {
; DEBUGLOC-LABEL: define void @widen_intrinsic_dbg(
-;
; DEBUGLOC: vector.body:
; DEBUGLOC: = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !dbg ![[INTRINSIC_LOC:[0-9]+]]
; DEBUGLOC: for.body:
diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll
index 8800928f5b87b5..76457cf599c9a5 100644
--- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll
@@ -1,10 +1,10 @@
-; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s -check-prefix DEBUGLOC
+; RUN: opt < %s -passes=debugify,loop-vectorize -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s -check-prefix DEBUGLOC
; Testing the debug locations of the generated vector intstruction are same as
; their scalar counterpart.
-; DEBUGLOC-LABEL: define i32 @reduction_sum(
define i32 @reduction_sum(ptr %A, ptr %B) {
+; DEBUGLOC-LABEL: define i32 @reduction_sum(
; DEBUGLOC: vector.body:
; DEBUGLOC: %[[VecLoad:.*]] = load <4 x i32>, ptr %2, align 4, !dbg ![[LoadLoc0:[0-9]+]]
; DEBUGLOC: %[[VecRed:.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %wide.load), !dbg ![[RedLoc0:[0-9]+]]
>From 7d13a09b4b4ae4d11c995d34c64e1fc1838f27e3 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Mon, 16 Dec 2024 04:24:38 -0800
Subject: [PATCH 6/6] !fixup use `auto` and update naming in testcase.
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +-
.../preserve-dbg-loc-reduction-inloop.ll | 11 +++++++----
2 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 967aa4048f80f1..12e340e7fdf848 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9575,7 +9575,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (CM.blockNeedsPredicationForAnyReason(BB))
CondOp = RecipeBuilder.getBlockInMask(BB);
- VPReductionRecipe *RedRecipe = new VPReductionRecipe(
+ auto *RedRecipe = new VPReductionRecipe(
RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp,
CM.useOrderedReductions(RdxDesc), CurrentLinkI->getDebugLoc());
// Append the recipe to the end of the VPBasicBlock because we need to
diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll
index 76457cf599c9a5..3048f698d94ab1 100644
--- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll
@@ -6,11 +6,11 @@
define i32 @reduction_sum(ptr %A, ptr %B) {
; DEBUGLOC-LABEL: define i32 @reduction_sum(
; DEBUGLOC: vector.body:
-; DEBUGLOC: %[[VecLoad:.*]] = load <4 x i32>, ptr %2, align 4, !dbg ![[LoadLoc0:[0-9]+]]
-; DEBUGLOC: %[[VecRed:.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %wide.load), !dbg ![[RedLoc0:[0-9]+]]
+; DEBUGLOC: = load <4 x i32>, ptr %2, align 4, !dbg ![[LOADLOC:[0-9]+]]
+; DEBUGLOC: = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %{{.+}}), !dbg ![[REDLOC:[0-9]+]]
; DEBUGLOC: loop:
-; DEBUGLOC: %l3 = load i32, ptr %l2, align 4, !dbg ![[LoadLoc0]]
-; DEBUGLOC: %l7 = add i32 %sum.02, %l3, !dbg ![[RedLoc0]]
+; DEBUGLOC: %l3 = load i32, ptr %l2, align 4, !dbg ![[LOADLOC]]
+; DEBUGLOC: %l7 = add i32 %sum.02, %l3, !dbg ![[REDLOC]]
;
entry:
br label %loop
@@ -29,3 +29,6 @@ exit:
%sum.0.lcssa = phi i32 [ %l7, %loop ]
ret i32 %sum.0.lcssa
}
+
+; DEBUGLOC: ![[LOADLOC]] = !DILocation(line: 5
+; DEBUGLOC: ![[REDLOC]] = !DILocation(line: 6
More information about the llvm-commits
mailing list