[llvm] [RISCV][TTI] Improve SiFive7 reduction cost (PR #90951)

Fri May 3 02:21:34 PDT 2024

llvmbot wrote:



@llvm/pr-subscribers-backend-risc-v

@llvm/pr-subscribers-llvm-analysis

Author: Shih-Po Hung (arcbbb)

<details>
<summary>Changes</summary>

The cost calculation for intrinsic llvm.vector.reduce.* factors in the costs of instructions VRED, VFRED, VMV, and VFMV.

This patch implements a target-specific cost function. If an opcode is supported in the target-specific cost function, it takes precedence over the default cost.

Co-authored-by: Elvis Wang <elvis.wang@sifive.com>

---

Patch is 77.52 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/90951.diff


2 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+60) 
- (added) llvm/test/Analysis/CostModel/RISCV/sifive-x280-reduce.ll (+675) 


``````````diff

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index ce26e61880fd05..81fc3e81fd1d0f 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -34,6 +34,54 @@ static cl::opt<unsigned> SLPMaxVF(
         "exclusively by SLP vectorizer."),
     cl::Hidden);
 
+static std::optional<InstructionCost>
+getSiFiveX280RVVCost(unsigned Op, MVT VT, TTI::TargetCostKind CostKind) {
+  std::optional<InstructionCost> Cost;
+  unsigned VScale = 8;
+  switch (Op) {
+  default:
+    Cost = std::nullopt;
+    break;
+  case RISCV::VREDMAX_VS:
+  case RISCV::VREDMIN_VS:
+  case RISCV::VREDMAXU_VS:
+  case RISCV::VREDMINU_VS:
+  case RISCV::VREDSUM_VS:
+  case RISCV::VREDAND_VS:
+  case RISCV::VREDOR_VS:
+  case RISCV::VREDXOR_VS:
+  case RISCV::VFREDMAX_VS:
+  case RISCV::VFREDMIN_VS:
+  case RISCV::VFREDUSUM_VS: {
+    unsigned VL = VT.getVectorMinNumElements();
+    if (!VT.isFixedLengthVector())
+      VL *= VScale;
+    // For the cases with small VL, we use a lookup table for accurate
+    // cost estimation.
+    unsigned LookUpSiFive7ReduceLatency[] = {0, 20, 27, 32, 34, 38, 40, 41, 42};
+    if (VL <= 32) {
+      Cost = LookUpSiFive7ReduceLatency[(VL + 3) >> 2];
+      break;
+    }
+    Cost = 6 + 7 * Log2_32_Ceil(VL);
+    break;
+  }
+  case RISCV::VFREDOSUM_VS: {
+    unsigned VL = VT.getVectorMinNumElements();
+    if (!VT.isFixedLengthVector())
+      VL *= VScale;
+    Cost = VL * 6;
+    break;
+  }
+  case RISCV::VMV_X_S:
+  case RISCV::VFMV_F_S:
+  case RISCV::VCPOP_M:
+    /* Vector-to-scalar communication */
+    Cost = 8;
+  }
+  return Cost;
+}
+
 InstructionCost
 RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
                                       TTI::TargetCostKind CostKind) {
@@ -43,11 +91,23 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
   size_t NumInstr = OpCodes.size();
   if (CostKind == TTI::TCK_CodeSize)
     return NumInstr;
+
+  std::optional<InstructionCost> (*GetTargetCost)(
+      unsigned, MVT, TTI::TargetCostKind) = nullptr;
+  if (ST->getProcFamily() == RISCVSubtarget::SiFive7)
+    GetTargetCost = getSiFiveX280RVVCost;
   InstructionCost LMULCost = TLI->getLMULCost(VT);
   if ((CostKind != TTI::TCK_RecipThroughput) && (CostKind != TTI::TCK_Latency))
     return LMULCost * NumInstr;
   InstructionCost Cost = 0;
   for (auto Op : OpCodes) {
+    std::optional<InstructionCost> OverrideCost =
+        GetTargetCost ? GetTargetCost(Op, VT, CostKind) : std::nullopt;
+    if (OverrideCost) {
+      Cost += *OverrideCost;
+      continue;
+    }
+
     switch (Op) {
     case RISCV::VRGATHER_VI:
       Cost += TLI->getVRGatherVICost(VT);
diff --git a/llvm/test/Analysis/CostModel/RISCV/sifive-x280-reduce.ll b/llvm/test/Analysis/CostModel/RISCV/sifive-x280-reduce.ll
new file mode 100644
index 00000000000000..01632f9ab80643
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/sifive-x280-reduce.ll
@@ -0,0 +1,675 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv64 -mcpu=sifive-x280 -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=THROUGHPUT
+; RUN: opt < %s -mtriple=riscv64 -mcpu=sifive-x280 -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+
+declare i1 @llvm.vector.reduce.add.v16i1(<16 x i1>)
+declare i1 @llvm.vector.reduce.add.v32i1(<32 x i1>)
+declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>)
+declare i8 @llvm.vector.reduce.add.v256i8(<256 x i8>)
+declare i8 @llvm.vector.reduce.add.v512i8(<512 x i8>)
+declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.add.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.add.v128i16(<128 x i16>)
+declare i16 @llvm.vector.reduce.add.v256i16(<256 x i16>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.add.v64i32(<64 x i32>)
+declare i32 @llvm.vector.reduce.add.v128i32(<128 x i32>)
+declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.add.v32i64(<32 x i64>)
+declare i64 @llvm.vector.reduce.add.v64i64(<64 x i64>)
+
+define void @reduce_add() {
+; THROUGHPUT-LABEL: 'reduce_add'
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %1 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %2 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %3 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %4 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %5 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %6 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %7 = call i8 @llvm.vector.reduce.add.v256i8(<256 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %8 = call i8 @llvm.vector.reduce.add.v512i8(<512 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %9 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %10 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %11 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %12 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %13 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %14 = call i16 @llvm.vector.reduce.add.v256i16(<256 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %15 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %16 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %17 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %18 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %19 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %20 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %21 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %22 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %23 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %24 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %25 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %26 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %27 = call i1 @llvm.vector.reduce.add.nxv1i1(<vscale x 1 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %28 = call i1 @llvm.vector.reduce.add.nxv2i1(<vscale x 2 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %29 = call i1 @llvm.vector.reduce.add.nxv4i1(<vscale x 4 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %30 = call i1 @llvm.vector.reduce.add.nxv8i1(<vscale x 8 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %31 = call i1 @llvm.vector.reduce.add.nxv16i1(<vscale x 16 x i1> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %32 = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %33 = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %34 = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %35 = call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %36 = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %37 = call i8 @llvm.vector.reduce.add.nxv32i8(<vscale x 32 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %38 = call i8 @llvm.vector.reduce.add.nxv64i8(<vscale x 64 x i8> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %39 = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %40 = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %41 = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %42 = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %43 = call i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %44 = call i16 @llvm.vector.reduce.add.nxv32i16(<vscale x 32 x i16> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %45 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %46 = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %47 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %48 = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %49 = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %50 = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %51 = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %52 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %53 = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> undef)
+; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'reduce_add'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %5 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call i8 @llvm.vector.reduce.add.v256i8(<256 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %8 = call i8 @llvm.vector.reduce.add.v512i8(<512 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %11 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %12 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %13 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %14 = call i16 @llvm.vector.reduce.add.v256i16(<256 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %16 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %17 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %18 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %19 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %20 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %21 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %22 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %23 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %24 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %25 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %26 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %27 = call i1 @llvm.vector.reduce.add.nxv1i1(<vscale x 1 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call i1 @llvm.vector.reduce.add.nxv2i1(<vscale x 2 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %29 = call i1 @llvm.vector.reduce.add.nxv4i1(<vscale x 4 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %30 = call i1 @llvm.vector.reduce.add.nxv8i1(<vscale x 8 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %31 = call i1 @llvm.vector.reduce.add.nxv16i1(<vscale x 16 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %32 = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %33 = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %34 = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %35 = call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %36 = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %37 = call i8 @llvm.vector.reduce.add.nxv32i8(<vscale x 32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %38 = call i8 @llvm.vector.reduce.add.nxv64i8(<vscale x 64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %39 = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %40 = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %41 = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %42 = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %43 = call i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %44 = call i16 @llvm.vector.reduce.add.nxv32i16(<vscale x 32 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %45 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %46 = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %47 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %48 = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %49 = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %50 = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %51 = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %52 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %53 = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  call i1 @llvm.vector.reduce.add.v1...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/90951