[llvm] [VPlan] Move FOR splice cost into VPInstruction::FirstOrderRecurrenceSplice (PR #129645)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 13 05:10:53 PDT 2025
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/129645
>From a3d6cf46445992a20affc8acb686bb49ca861922 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 4 Mar 2025 12:46:40 +0800
Subject: [PATCH 1/4] Precommit test
---
.../RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll
index 0bcfe13832ae7..eeb7a525f72d1 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll
@@ -51,7 +51,8 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: EMIT vp<[[RESUME_EXTRACT:%.+]]> = extract-from-end ir<[[LD]]>, ir<1>
; IF-EVL-NEXT: EMIT branch-on-cond ir<true>
; IF-EVL-NEXT: Successor(s): ir-bb<for.end>, scalar.ph
-
+; IF-EVL: Cost of 4 for VF vscale x 4: FIRST-ORDER-RECURRENCE-PHI ir<[[FOR_PHI]]> = phi ir<33>, ir<[[LD]]>
+; IF-EVL: Cost of 4 for VF vscale x 4: WIDEN-INTRINSIC vp<[[SPLICE]]> = call llvm.experimental.vp.splice(ir<[[FOR_PHI]]>, ir<[[LD]]>, ir<-1>, ir<true>, vp<[[PREV_EVL]]>, vp<[[EVL]]>)
entry:
br label %for.body
>From eaf19475e5509f30d434a8aa56b96961027c9281 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 4 Mar 2025 13:22:24 +0800
Subject: [PATCH 2/4] [VPlan] Move FOR splice cost into
VPInstruction::FirstOrderRecurrenceSplice
After #124093 we now support fixed-order recurrences with EVL tail folding by replacing VPInstruction::FirstOrderRecurrenceSplice with a VP splice intrinsic.
However the costing for the splice is currently done in VPFirstOrderRecurrencePHIRecipe, so when we add the VP splice intrinsic we end up costing it twice.
This fixes it by splitting out the cost for the splice into FirstOrderRecurrenceSplice so that it's not duplicated when we replace it.
We still have to keep the VF=1 checks in VPFirstOrderRecurrencePHIRecipe since the splice might end up dead and discarded, e.g. in the test @pr97452_scalable_vf1_for.
---
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 20 +++++++++++--------
...an-vp-intrinsics-fixed-order-recurrence.ll | 2 +-
2 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index e9f50e88867b2..5ce4d2ae6ff53 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -743,6 +743,17 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
return Ctx.TTI.getArithmeticReductionCost(
Instruction::Or, cast<VectorType>(VecTy), std::nullopt, Ctx.CostKind);
}
+ case VPInstruction::FirstOrderRecurrenceSplice: {
+ assert(VF.isVector());
+ SmallVector<int> Mask(VF.getKnownMinValue());
+ std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
+ Type *VectorTy =
+ toVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
+
+ return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Splice,
+ cast<VectorType>(VectorTy), Mask,
+ Ctx.CostKind, VF.getKnownMinValue() - 1);
+ }
default:
// TODO: Compute cost other VPInstructions once the legacy cost model has
// been retired.
@@ -3463,14 +3474,7 @@ VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF,
if (VF.isScalable() && VF.getKnownMinValue() == 1)
return InstructionCost::getInvalid();
- SmallVector<int> Mask(VF.getKnownMinValue());
- std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
- Type *VectorTy =
- toVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
-
- return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Splice,
- cast<VectorType>(VectorTy), Mask, Ctx.CostKind,
- VF.getKnownMinValue() - 1);
+ return 0;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll
index eeb7a525f72d1..deeff38b1fe78 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll
@@ -51,7 +51,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: EMIT vp<[[RESUME_EXTRACT:%.+]]> = extract-from-end ir<[[LD]]>, ir<1>
; IF-EVL-NEXT: EMIT branch-on-cond ir<true>
; IF-EVL-NEXT: Successor(s): ir-bb<for.end>, scalar.ph
-; IF-EVL: Cost of 4 for VF vscale x 4: FIRST-ORDER-RECURRENCE-PHI ir<[[FOR_PHI]]> = phi ir<33>, ir<[[LD]]>
+; IF-EVL: Cost of 0 for VF vscale x 4: FIRST-ORDER-RECURRENCE-PHI ir<[[FOR_PHI]]> = phi ir<33>, ir<[[LD]]>
; IF-EVL: Cost of 4 for VF vscale x 4: WIDEN-INTRINSIC vp<[[SPLICE]]> = call llvm.experimental.vp.splice(ir<[[FOR_PHI]]>, ir<[[LD]]>, ir<-1>, ir<true>, vp<[[PREV_EVL]]>, vp<[[EVL]]>)
entry:
br label %for.body
>From 6e4fd8f1bfce56ddd38544b1641af8c85b4969fa Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 5 Mar 2025 18:41:59 +0800
Subject: [PATCH 3/4] Add assertion message
---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 5ce4d2ae6ff53..07a67977c2189 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -744,7 +744,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
Instruction::Or, cast<VectorType>(VecTy), std::nullopt, Ctx.CostKind);
}
case VPInstruction::FirstOrderRecurrenceSplice: {
- assert(VF.isVector());
+ assert(VF.isVector() && "Scalar FirstOrderRecurrenceSplice?");
SmallVector<int> Mask(VF.getKnownMinValue());
std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
Type *VectorTy =
>From c5f5681903c0117a9e1e6027c1d1bfc0029c62c5 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 13 Mar 2025 20:10:25 +0800
Subject: [PATCH 4/4] Remove getVPSingleValue
---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 6be8172292da2..55d70fbbc5e41 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -758,8 +758,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
assert(VF.isVector() && "Scalar FirstOrderRecurrenceSplice?");
SmallVector<int> Mask(VF.getKnownMinValue());
std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
- Type *VectorTy =
- toVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
+ Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Splice,
cast<VectorType>(VectorTy), Mask,
More information about the llvm-commits
mailing list