[llvm] [VPlan] Compute cost for binary op VPInstruction with underlying values. (PR #125434)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 2 13:20:04 PST 2025
https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/125434
None
>From b2adb14bac54b9cc294180938d75b478eeb38c2d Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 2 Feb 2025 21:15:46 +0000
Subject: [PATCH 1/2] [LV] Add test checking costs of some VPInstructions.
---
.../X86/CostModel/vpinstruction-cost.ll | 74 +++++++++++++++++++
1 file changed, 74 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll
new file mode 100644
index 00000000000000..25737bbb0096a0
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of"
+; RUN: opt -S -passes=loop-vectorize -mcpu=skylake-avx512 -mtriple=x86_64-apple-macosx -debug -disable-output -S %s 2>&1 | FileCheck %s
+
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+
+define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst) {
+; CHECK-LABEL: 'wide_or_replaced_with_add_vpinstruction'
+; CHECK: Cost of 1 for VF 2: induction instruction %iv.next = add nuw nsw i64 %iv, 1
+; CHECK: Cost of 0 for VF 2: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+; CHECK: Cost of 1 for VF 2: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
+; CHECK: Cost of 0 for VF 2: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK: Cost of 0 for VF 2: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0>
+; CHECK: Cost of 0 for VF 2: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>
+; CHECK: Cost of 0 for VF 2: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4>
+; CHECK: Cost of 0 for VF 2: vp<%5> = vector-pointer ir<%g.src>
+; CHECK: Cost of 1 for VF 2: WIDEN ir<%l> = load vp<%5>
+; CHECK: Cost of 1 for VF 2: WIDEN ir<%iv.4> = add ir<%iv>, ir<4>
+; CHECK: Cost of 1 for VF 2: WIDEN ir<%c> = icmp ule ir<%l>, ir<128>
+; CHECK: Cost of 0 for VF 2: EMIT ir<%or> = add ir<%iv.4>, ir<1>
+; CHECK: Cost of 0 for VF 2: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or>
+; CHECK: Cost of 0 for VF 2: vp<%6> = vector-pointer ir<%g.dst>
+; CHECK: Cost of 1 for VF 2: WIDEN store vp<%6>, ir<%iv.4>, ir<%c>
+; CHECK: Cost of 0 for VF 2: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1>
+; CHECK: Cost of 0 for VF 2: EMIT branch-on-count vp<%index.next>, vp<%2>
+; CHECK: Cost of 0 for VF 2: vector loop backedge
+; CHECK: Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1
+; CHECK: Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
+; CHECK: Cost of 0 for VF 4: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK: Cost of 0 for VF 4: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0>
+; CHECK: Cost of 0 for VF 4: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>
+; CHECK: Cost of 0 for VF 4: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4>
+; CHECK: Cost of 0 for VF 4: vp<%5> = vector-pointer ir<%g.src>
+; CHECK: Cost of 1 for VF 4: WIDEN ir<%l> = load vp<%5>
+; CHECK: Cost of 1 for VF 4: WIDEN ir<%iv.4> = add ir<%iv>, ir<4>
+; CHECK: Cost of 1 for VF 4: WIDEN ir<%c> = icmp ule ir<%l>, ir<128>
+; CHECK: Cost of 0 for VF 4: EMIT ir<%or> = add ir<%iv.4>, ir<1>
+; CHECK: Cost of 0 for VF 4: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or>
+; CHECK: Cost of 0 for VF 4: vp<%6> = vector-pointer ir<%g.dst>
+; CHECK: Cost of 1 for VF 4: WIDEN store vp<%6>, ir<%iv.4>, ir<%c>
+; CHECK: Cost of 0 for VF 4: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1>
+; CHECK: Cost of 0 for VF 4: EMIT branch-on-count vp<%index.next>, vp<%2>
+; CHECK: Cost of 0 for VF 4: vector loop backedge
+; CHECK: Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1
+; CHECK: Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %g.src = getelementptr inbounds i64, ptr %src, i64 %iv
+ %l = load i64, ptr %g.src
+ %iv.4 = add nuw nsw i64 %iv, 4
+ %c = icmp ule i64 %l, 128
+ br i1 %c, label %loop.then, label %loop.latch
+
+loop.then:
+ %or = or disjoint i64 %iv.4, 1
+ %g.dst = getelementptr inbounds i64, ptr %dst, i64 %or
+ store i64 %iv.4, ptr %g.dst, align 4
+ br label %loop.latch
+
+loop.latch:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 32
+ br i1 %exitcond, label %exit, label %loop.header
+
+exit:
+ ret void
+}
>From 54439f432230be5181bea5bb2626423b04f28c09 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 2 Feb 2025 21:17:51 +0000
Subject: [PATCH 2/2] [VPlan] Compute cost for binary op VPInstruction with
underlying values.
---
llvm/lib/Transforms/Vectorize/VPlan.h | 5 +----
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 19 +++++++++++++++++++
.../X86/CostModel/vpinstruction-cost.ll | 4 ++--
3 files changed, 22 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index fac207287e0bcc..5b9dcf68a62bec 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -972,10 +972,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
/// Return the cost of this VPInstruction.
InstructionCost computeCost(ElementCount VF,
- VPCostContext &Ctx) const override {
- // TODO: Compute accurate cost after retiring the legacy cost model.
- return 0;
- }
+ VPCostContext &Ctx) const override;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the VPInstruction to \p O.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index c84a93d7398f73..ac70f2498dbaeb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -768,6 +768,25 @@ void VPInstruction::execute(VPTransformState &State) {
/*IsScalar*/ GeneratesPerFirstLaneOnly);
}
+InstructionCost VPInstruction::computeCost(ElementCount VF,
+ VPCostContext &Ctx) const {
+ if (Instruction::isBinaryOp(getOpcode())) {
+ if (!getUnderlyingValue())
+ return 0;
+
+ assert(!doesGeneratePerAllLanes() && "Should only generate a vector value or single scalar, not scalars for all lanes.);
+ Type *ResTy = Ctx.Types.inferScalarType(this);
+ if (!vputils::onlyFirstLaneUsed(this))
+ ResTy = toVectorTy(ResTy, VF);
+
+ return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind);
+ }
+
+ assert(!getUnderlyingValue() &&
+ "unexpected VPInstruction without underlying value");
+ return 0;
+}
+
bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
if (Instruction::isBinaryOp(getOpcode()))
return false;
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll
index 25737bbb0096a0..bb85b88f181f78 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll
@@ -18,7 +18,7 @@ define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst)
; CHECK: Cost of 1 for VF 2: WIDEN ir<%l> = load vp<%5>
; CHECK: Cost of 1 for VF 2: WIDEN ir<%iv.4> = add ir<%iv>, ir<4>
; CHECK: Cost of 1 for VF 2: WIDEN ir<%c> = icmp ule ir<%l>, ir<128>
-; CHECK: Cost of 0 for VF 2: EMIT ir<%or> = add ir<%iv.4>, ir<1>
+; CHECK: Cost of 1 for VF 2: EMIT ir<%or> = add ir<%iv.4>, ir<1>
; CHECK: Cost of 0 for VF 2: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or>
; CHECK: Cost of 0 for VF 2: vp<%6> = vector-pointer ir<%g.dst>
; CHECK: Cost of 1 for VF 2: WIDEN store vp<%6>, ir<%iv.4>, ir<%c>
@@ -36,7 +36,7 @@ define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst)
; CHECK: Cost of 1 for VF 4: WIDEN ir<%l> = load vp<%5>
; CHECK: Cost of 1 for VF 4: WIDEN ir<%iv.4> = add ir<%iv>, ir<4>
; CHECK: Cost of 1 for VF 4: WIDEN ir<%c> = icmp ule ir<%l>, ir<128>
-; CHECK: Cost of 0 for VF 4: EMIT ir<%or> = add ir<%iv.4>, ir<1>
+; CHECK: Cost of 1 for VF 4: EMIT ir<%or> = add ir<%iv.4>, ir<1>
; CHECK: Cost of 0 for VF 4: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or>
; CHECK: Cost of 0 for VF 4: vp<%6> = vector-pointer ir<%g.dst>
; CHECK: Cost of 1 for VF 4: WIDEN store vp<%6>, ir<%iv.4>, ir<%c>
More information about the llvm-commits
mailing list