[llvm] [RISCV][TTI] Implement instruction cost for vp.reduce.* (PR #114184)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 29 22:19:31 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Elvis Wang (ElvisWang123)
<details>
<summary>Changes</summary>
This patch based on #<!-- -->114180.
---
Patch is 624.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114184.diff
14 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+74-1)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-add.ll (+131-4)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-and.ll (+140-4)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll (+211-3)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll (+178-4)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll (+94-4)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll (+211-3)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-max.ll (+257-5)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-min.ll (+256-4)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-or.ll (+140-4)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll (+207-4)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll (+348-4)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll (+131-4)
- (modified) llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll (+32-32)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 395baa5f1aab99..627eb9cc4bc5fe 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1179,6 +1179,79 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return getCmpSelInstrCost(Instruction::Select, ICA.getReturnType(),
ICA.getArgTypes()[0], CmpInst::BAD_ICMP_PREDICATE,
CostKind);
+ case Intrinsic::vp_reduce_add:
+ case Intrinsic::vp_reduce_fadd:
+ case Intrinsic::vp_reduce_mul:
+ case Intrinsic::vp_reduce_fmul:
+ case Intrinsic::vp_reduce_and:
+ case Intrinsic::vp_reduce_or:
+ case Intrinsic::vp_reduce_xor: {
+ unsigned Opcode;
+ switch (ICA.getID()) {
+ case Intrinsic::vp_reduce_add:
+ Opcode = Instruction::Add;
+ break;
+ case Intrinsic::vp_reduce_fadd:
+ Opcode = Instruction::FAdd;
+ break;
+ case Intrinsic::vp_reduce_mul:
+ Opcode = Instruction::Mul;
+ break;
+ case Intrinsic::vp_reduce_fmul:
+ Opcode = Instruction::FMul;
+ break;
+ case Intrinsic::vp_reduce_and:
+ Opcode = Instruction::And;
+ break;
+ case Intrinsic::vp_reduce_or:
+ Opcode = Instruction::Or;
+ break;
+ case Intrinsic::vp_reduce_xor:
+ Opcode = Instruction::Xor;
+ break;
+ }
+ return getArithmeticReductionCost(Opcode,
+ cast<VectorType>(ICA.getArgTypes()[1]),
+ ICA.getFlags(), CostKind);
+ }
+ case Intrinsic::vp_reduce_smax:
+ case Intrinsic::vp_reduce_smin:
+ case Intrinsic::vp_reduce_umax:
+ case Intrinsic::vp_reduce_umin:
+ case Intrinsic::vp_reduce_fmax:
+ case Intrinsic::vp_reduce_fmaximum:
+ case Intrinsic::vp_reduce_fmin:
+ case Intrinsic::vp_reduce_fminimum: {
+ unsigned IID;
+ switch (ICA.getID()) {
+ case Intrinsic::vp_reduce_smax:
+ IID = Intrinsic::smax;
+ break;
+ case Intrinsic::vp_reduce_smin:
+ IID = Intrinsic::smin;
+ break;
+ case Intrinsic::vp_reduce_umax:
+ IID = Intrinsic::umax;
+ break;
+ case Intrinsic::vp_reduce_umin:
+ IID = Intrinsic::umin;
+ break;
+ case Intrinsic::vp_reduce_fmax:
+ IID = Intrinsic::maxnum;
+ break;
+ case Intrinsic::vp_reduce_fmaximum:
+ IID = Intrinsic::maximum;
+ break;
+ case Intrinsic::vp_reduce_fmin:
+ IID = Intrinsic::minnum;
+ break;
+ case Intrinsic::vp_reduce_fminimum:
+ IID = Intrinsic::minimum;
+ break;
+ }
+ return getMinMaxReductionCost(IID, cast<VectorType>(ICA.getArgTypes()[1]),
+ ICA.getFlags(), CostKind);
+ }
}
if (ST->hasVInstructions() && RetTy->isVectorTy()) {
@@ -1552,7 +1625,7 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
}
// IR Reduction is composed by two vmv and one rvv reduction instruction.
- if (TTI::requiresOrderedReduction(FMF)) {
+ if (TTI::requiresOrderedReduction(FMF) && ElementTy->isFloatingPointTy()) {
Opcodes.push_back(RISCV::VFMV_S_F);
for (unsigned i = 0; i < LT.first.getValue(); i++)
Opcodes.push_back(RISCV::VFREDOSUM_VS);
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
index 6032ae01aa718b..70687da17eb1a5 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
@@ -1,8 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
define i32 @reduce_i1(i32 %arg) {
; CHECK-LABEL: 'reduce_i1'
@@ -14,6 +16,14 @@ define i32 @reduce_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i1'
@@ -25,6 +35,14 @@ define i32 @reduce_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
@@ -35,6 +53,15 @@ define i32 @reduce_i1(i32 %arg) {
%V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
%V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
%V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
+
+ %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -48,6 +75,14 @@ define i32 @reduce_i8(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i8'
@@ -59,6 +94,14 @@ define i32 @reduce_i8(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
@@ -69,6 +112,15 @@ define i32 @reduce_i8(i32 %arg) {
%V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
%V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
%V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+
+ %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -82,6 +134,14 @@ define i32 @reduce_i16(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i16'
@@ -93,6 +153,14 @@ define i32 @reduce_i16(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
@@ -103,6 +171,15 @@ define i32 @reduce_i16(i32 %arg) {
%V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
%V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
%V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+
+ %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -116,6 +193,14 @@ define i32 @reduce_i32(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost o...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/114184
More information about the llvm-commits
mailing list