[llvm] [VPlan] Compute cost for binary op VPInstruction with underlying values. (PR #125434)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 6 11:47:44 PST 2025


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/125434

>From 24e08a345e15d31eae88c28b1a9ddcdc77d6a100 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 2 Feb 2025 21:15:46 +0000
Subject: [PATCH 1/4] [LV] Add test checking costs of some VPInstructions.

---
 .../X86/CostModel/vpinstruction-cost.ll       | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll

diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll
new file mode 100644
index 000000000000000..25737bbb0096a00
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of"
+; RUN: opt -S -passes=loop-vectorize -mcpu=skylake-avx512 -mtriple=x86_64-apple-macosx -debug -disable-output -S %s 2>&1 | FileCheck %s
+
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+
+define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst) {
+; CHECK-LABEL: 'wide_or_replaced_with_add_vpinstruction'
+; CHECK:  Cost of 1 for VF 2: induction instruction %iv.next = add nuw nsw i64 %iv, 1
+; CHECK:  Cost of 0 for VF 2: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+; CHECK:  Cost of 1 for VF 2: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
+; CHECK:  Cost of 0 for VF 2: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK:  Cost of 0 for VF 2: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0>
+; CHECK:  Cost of 0 for VF 2: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>
+; CHECK:  Cost of 0 for VF 2: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4>
+; CHECK:  Cost of 0 for VF 2: vp<%5> = vector-pointer ir<%g.src>
+; CHECK:  Cost of 1 for VF 2: WIDEN ir<%l> = load vp<%5>
+; CHECK:  Cost of 1 for VF 2: WIDEN ir<%iv.4> = add ir<%iv>, ir<4>
+; CHECK:  Cost of 1 for VF 2: WIDEN ir<%c> = icmp ule ir<%l>, ir<128>
+; CHECK:  Cost of 0 for VF 2: EMIT ir<%or> = add ir<%iv.4>, ir<1>
+; CHECK:  Cost of 0 for VF 2: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or>
+; CHECK:  Cost of 0 for VF 2: vp<%6> = vector-pointer ir<%g.dst>
+; CHECK:  Cost of 1 for VF 2: WIDEN store vp<%6>, ir<%iv.4>, ir<%c>
+; CHECK:  Cost of 0 for VF 2: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1>
+; CHECK:  Cost of 0 for VF 2: EMIT branch-on-count vp<%index.next>, vp<%2>
+; CHECK:  Cost of 0 for VF 2: vector loop backedge
+; CHECK:  Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1
+; CHECK:  Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+; CHECK:  Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
+; CHECK:  Cost of 0 for VF 4: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK:  Cost of 0 for VF 4: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0>
+; CHECK:  Cost of 0 for VF 4: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>
+; CHECK:  Cost of 0 for VF 4: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4>
+; CHECK:  Cost of 0 for VF 4: vp<%5> = vector-pointer ir<%g.src>
+; CHECK:  Cost of 1 for VF 4: WIDEN ir<%l> = load vp<%5>
+; CHECK:  Cost of 1 for VF 4: WIDEN ir<%iv.4> = add ir<%iv>, ir<4>
+; CHECK:  Cost of 1 for VF 4: WIDEN ir<%c> = icmp ule ir<%l>, ir<128>
+; CHECK:  Cost of 0 for VF 4: EMIT ir<%or> = add ir<%iv.4>, ir<1>
+; CHECK:  Cost of 0 for VF 4: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or>
+; CHECK:  Cost of 0 for VF 4: vp<%6> = vector-pointer ir<%g.dst>
+; CHECK:  Cost of 1 for VF 4: WIDEN store vp<%6>, ir<%iv.4>, ir<%c>
+; CHECK:  Cost of 0 for VF 4: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1>
+; CHECK:  Cost of 0 for VF 4: EMIT branch-on-count vp<%index.next>, vp<%2>
+; CHECK:  Cost of 0 for VF 4: vector loop backedge
+; CHECK:  Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1
+; CHECK:  Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+; CHECK:  Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  %g.src = getelementptr inbounds i64, ptr %src, i64 %iv
+  %l = load i64, ptr %g.src
+  %iv.4 = add nuw nsw i64 %iv, 4
+  %c = icmp ule i64 %l, 128
+  br i1 %c, label %loop.then, label %loop.latch
+
+loop.then:
+  %or = or disjoint i64 %iv.4, 1
+  %g.dst = getelementptr inbounds i64, ptr %dst, i64 %or
+  store i64 %iv.4, ptr %g.dst, align 4
+  br label %loop.latch
+
+loop.latch:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 32
+  br i1 %exitcond, label %exit, label %loop.header
+
+exit:
+  ret void
+}

>From fddabf6128721affb2b3b89a5ac15897b4dc2f66 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 2 Feb 2025 21:17:51 +0000
Subject: [PATCH 2/4] [VPlan] Compute cost for binary op VPInstruction with
 underlying values.

---
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 21 +++++++++++++++++++
 .../X86/CostModel/vpinstruction-cost.ll       |  4 ++--
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index b734ddfce788e21..f7f547075e1e084 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -782,6 +782,27 @@ void VPInstruction::execute(VPTransformState &State) {
             /*IsScalar*/ GeneratesPerFirstLaneOnly);
 }
 
+InstructionCost VPInstruction::computeCost(ElementCount VF,
+                                           VPCostContext &Ctx) const {
+  if (Instruction::isBinaryOp(getOpcode())) {
+    if (!getUnderlyingValue())
+      return 0;
+
+    assert(!doesGeneratePerAllLanes() &&
+           "Should only generate a vector value or single scalar, not scalars "
+           "for all lanes.");
+    Type *ResTy = Ctx.Types.inferScalarType(this);
+    if (!vputils::onlyFirstLaneUsed(this))
+      ResTy = toVectorTy(ResTy, VF);
+
+    return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind);
+  }
+
+  assert(!getUnderlyingValue() &&
+         "unexpected VPInstruction without underlying value");
+  return 0;
+}
+
 bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
   if (Instruction::isBinaryOp(getOpcode()))
     return false;
diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll
index 25737bbb0096a00..bb85b88f181f784 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll
@@ -18,7 +18,7 @@ define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst)
 ; CHECK:  Cost of 1 for VF 2: WIDEN ir<%l> = load vp<%5>
 ; CHECK:  Cost of 1 for VF 2: WIDEN ir<%iv.4> = add ir<%iv>, ir<4>
 ; CHECK:  Cost of 1 for VF 2: WIDEN ir<%c> = icmp ule ir<%l>, ir<128>
-; CHECK:  Cost of 0 for VF 2: EMIT ir<%or> = add ir<%iv.4>, ir<1>
+; CHECK:  Cost of 1 for VF 2: EMIT ir<%or> = add ir<%iv.4>, ir<1>
 ; CHECK:  Cost of 0 for VF 2: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or>
 ; CHECK:  Cost of 0 for VF 2: vp<%6> = vector-pointer ir<%g.dst>
 ; CHECK:  Cost of 1 for VF 2: WIDEN store vp<%6>, ir<%iv.4>, ir<%c>
@@ -36,7 +36,7 @@ define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst)
 ; CHECK:  Cost of 1 for VF 4: WIDEN ir<%l> = load vp<%5>
 ; CHECK:  Cost of 1 for VF 4: WIDEN ir<%iv.4> = add ir<%iv>, ir<4>
 ; CHECK:  Cost of 1 for VF 4: WIDEN ir<%c> = icmp ule ir<%l>, ir<128>
-; CHECK:  Cost of 0 for VF 4: EMIT ir<%or> = add ir<%iv.4>, ir<1>
+; CHECK:  Cost of 1 for VF 4: EMIT ir<%or> = add ir<%iv.4>, ir<1>
 ; CHECK:  Cost of 0 for VF 4: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or>
 ; CHECK:  Cost of 0 for VF 4: vp<%6> = vector-pointer ir<%g.dst>
 ; CHECK:  Cost of 1 for VF 4: WIDEN store vp<%6>, ir<%iv.4>, ir<%c>

>From 9a955b1baa9e2fa3fb677f26c00f6fde6bccc6a0 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 4 Feb 2025 21:52:36 +0000
Subject: [PATCH 3/4] !fixup address latest comments, thanks!

---
 llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index f7f547075e1e084..26145000b5ae225 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -785,8 +785,11 @@ void VPInstruction::execute(VPTransformState &State) {
 InstructionCost VPInstruction::computeCost(ElementCount VF,
                                            VPCostContext &Ctx) const {
   if (Instruction::isBinaryOp(getOpcode())) {
-    if (!getUnderlyingValue())
+    if (!getUnderlyingValue()) {
+      // TODO: Compute cost for VPInstructions without underlying values once
+      // the legacy cost model has been retired.
       return 0;
+    }
 
     assert(!doesGeneratePerAllLanes() &&
            "Should only generate a vector value or single scalar, not scalars "
@@ -798,8 +801,10 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
     return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind);
   }
 
+  // TODO: Compute cost other VPInstructions once the legacy cost model has
+  // been retired.
   assert(!getUnderlyingValue() &&
-         "unexpected VPInstruction without underlying value");
+         "unexpected VPInstruction witht underlying value");
   return 0;
 }
 

>From e37c2ad55ab9f5821f087bb618f9659014e1d56a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 6 Feb 2025 19:43:49 +0000
Subject: [PATCH 4/4] !fixup update after rebase.

---
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 48 ++++++++-----------
 1 file changed, 21 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 26145000b5ae225..eb1aac452375291 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -713,6 +713,23 @@ Value *VPInstruction::generate(VPTransformState &State) {
 
 InstructionCost VPInstruction::computeCost(ElementCount VF,
                                            VPCostContext &Ctx) const {
+  if (Instruction::isBinaryOp(getOpcode())) {
+    if (!getUnderlyingValue()) {
+      // TODO: Compute cost for VPInstructions without underlying values once
+      // the legacy cost model has been retired.
+      return 0;
+    }
+
+    assert(!doesGeneratePerAllLanes() &&
+           "Should only generate a vector value or single scalar, not scalars "
+           "for all lanes.");
+    Type *ResTy = Ctx.Types.inferScalarType(this);
+    if (!vputils::onlyFirstLaneUsed(this))
+      ResTy = toVectorTy(ResTy, VF);
+
+    return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind);
+  }
+
   switch (getOpcode()) {
   case VPInstruction::AnyOf: {
     auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
@@ -720,7 +737,10 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
         Instruction::Or, cast<VectorType>(VecTy), std::nullopt, Ctx.CostKind);
   }
   default:
-    // TODO: Fill out other opcodes!
+    // TODO: Compute cost other VPInstructions once the legacy cost model has
+    // been retired.
+    assert(!getUnderlyingValue() &&
+           "unexpected VPInstruction witht underlying value");
     return 0;
   }
 }
@@ -782,32 +802,6 @@ void VPInstruction::execute(VPTransformState &State) {
             /*IsScalar*/ GeneratesPerFirstLaneOnly);
 }
 
-InstructionCost VPInstruction::computeCost(ElementCount VF,
-                                           VPCostContext &Ctx) const {
-  if (Instruction::isBinaryOp(getOpcode())) {
-    if (!getUnderlyingValue()) {
-      // TODO: Compute cost for VPInstructions without underlying values once
-      // the legacy cost model has been retired.
-      return 0;
-    }
-
-    assert(!doesGeneratePerAllLanes() &&
-           "Should only generate a vector value or single scalar, not scalars "
-           "for all lanes.");
-    Type *ResTy = Ctx.Types.inferScalarType(this);
-    if (!vputils::onlyFirstLaneUsed(this))
-      ResTy = toVectorTy(ResTy, VF);
-
-    return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind);
-  }
-
-  // TODO: Compute cost other VPInstructions once the legacy cost model has
-  // been retired.
-  assert(!getUnderlyingValue() &&
-         "unexpected VPInstruction witht underlying value");
-  return 0;
-}
-
 bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
   if (Instruction::isBinaryOp(getOpcode()))
     return false;



More information about the llvm-commits mailing list