[llvm] [RISCV][CostModel] Updates reduction and shuffle cost (PR #77342)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 8 09:01:31 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
@llvm/pr-subscribers-backend-risc-v
Author: Shih-Po Hung (arcbbb)
<details>
<summary>Changes</summary>
- Make VMV_S_* and VMV_*_S cost independent of LMUL
- Uses getRISCVInstructionCost() in reduction cost Add SplitCost for lmul larger than 8. e.g. The cost of vredsum on [vscale x 16 x i64] will be the cost of vadd on [vscale x 8 x i64] plus the cost of vredsum on [vscale x 8 x i64].
---
Patch is 344.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/77342.diff
17 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+107-38)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-add.ll (+51-51)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-and.ll (+35-35)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll (+54-54)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-max.ll (+70-70)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-min.ll (+70-70)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-or.ll (+57-57)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll (+73-73)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll (+114-114)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll (+51-51)
- (modified) llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll (+4-4)
- (modified) llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll (+14-14)
- (modified) llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll (+6-6)
- (modified) llvm/test/Analysis/CostModel/RISCV/shuffle-insert_subvector.ll (+38-38)
- (modified) llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll (+6-6)
- (modified) llvm/test/Analysis/CostModel/RISCV/shuffle-select.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll (+10-10)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index b3916c98700519..6c143a762b0c03 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -46,6 +46,9 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
InstructionCost Cost = 0;
for (auto Op : OpCodes) {
switch (Op) {
+ case RISCV::SLT:
+ Cost += 1;
+ break;
case RISCV::VRGATHER_VI:
Cost += TLI->getVRGatherVICost(VT);
break;
@@ -84,8 +87,14 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
Cost += VL;
break;
}
+ case RISCV::VMV_X_S:
+ case RISCV::VFMV_F_S:
+ Cost += 1;
+ break;
case RISCV::VMV_S_X:
- // FIXME: VMV_S_X doesn't use LMUL, the cost should be 1
+ case RISCV::VFMV_S_F:
+ Cost += 1;
+ break;
default:
Cost += LMULCost;
}
@@ -444,9 +453,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// vmv.s.x v0, a0
// vmerge.vvm v8, v9, v8, v0
return LT.first *
- (TLI->getLMULCost(LT.second) + // FIXME: should be 1 for li
- getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
- LT.second, CostKind));
+ (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
+ LT.second, CostKind));
}
case TTI::SK_Broadcast: {
bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) ==
@@ -459,9 +467,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// vmv.v.x v8, a0
// vmsne.vi v0, v8, 0
return LT.first *
- (TLI->getLMULCost(LT.second) + // FIXME: should be 1 for andi
- getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
- LT.second, CostKind));
+ (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
+ LT.second, CostKind));
}
// Example sequence:
// vsetivli zero, 2, e8, mf8, ta, mu (ignored)
@@ -473,12 +480,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// vmsne.vi v0, v8, 0
return LT.first *
- (TLI->getLMULCost(LT.second) + // FIXME: this should be 1 for andi
- TLI->getLMULCost(
- LT.second) + // FIXME: vmv.x.s is the same as extractelement
- getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
- RISCV::VMV_V_X, RISCV::VMSNE_VI},
- LT.second, CostKind));
+ (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
+ RISCV::VMV_X_S, RISCV::VMV_V_X,
+ RISCV::VMSNE_VI},
+ LT.second, CostKind));
}
if (HasScalar) {
@@ -523,9 +528,9 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if (LT.second.isFixedLengthVector())
// vrsub.vi has a 5 bit immediate field, otherwise an li suffices
LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
- // FIXME: replace the constant `2` below with cost of {VID_V,VRSUB_VX}
- InstructionCost GatherCost =
- 2 + getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second, CostKind);
+ InstructionCost GatherCost = getRISCVInstructionCost(
+ {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV}, LT.second,
+ CostKind);
// Mask operation additionally required extend and truncate
InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0;
return LT.first * (LenCost + GatherCost + ExtendCost);
@@ -1358,19 +1363,53 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
- if (Ty->getElementType()->isIntegerTy(1))
- // vcpop sequences, see vreduction-mask.ll. umax, smin actually only
- // cost 2, but we don't have enough info here so we slightly over cost.
- return (LT.first - 1) + 3;
+ std::array<unsigned, 3> Opcodes;
+ if (Ty->getElementType()->isIntegerTy(1)) {
+ // vcpop sequences, see vreduction-mask.ll.
+ if ((IID == Intrinsic::umax) || (IID == Intrinsic::smin))
+ Opcodes = {RISCV::VMNAND_MM, RISCV::VCPOP_M, RISCV::SLT};
+ else
+ Opcodes = {RISCV::VCPOP_M, RISCV::SLT};
+ return (LT.first - 1) +
+ getRISCVInstructionCost(Opcodes, LT.second, CostKind);
+ }
// IR Reduction is composed by two vmv and one rvv reduction instruction.
- InstructionCost BaseCost = 2;
-
- if (CostKind == TTI::TCK_CodeSize)
- return (LT.first - 1) + BaseCost;
-
- unsigned VL = getEstimatedVLFor(Ty);
- return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
+ unsigned SplitOp;
+ switch (IID) {
+ default:
+ llvm_unreachable("Unsupported intrinsic");
+ case Intrinsic::smax:
+ SplitOp = RISCV::VMAX_VV;
+ Opcodes = {RISCV::VMV_S_X, RISCV::VREDMAX_VS, RISCV::VMV_X_S};
+ break;
+ case Intrinsic::smin:
+ SplitOp = RISCV::VMIN_VV;
+ Opcodes = {RISCV::VMV_S_X, RISCV::VREDMIN_VS, RISCV::VMV_X_S};
+ break;
+ case Intrinsic::umax:
+ SplitOp = RISCV::VMAXU_VV;
+ Opcodes = {RISCV::VMV_S_X, RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
+ break;
+ case Intrinsic::umin:
+ SplitOp = RISCV::VMINU_VV;
+ Opcodes = {RISCV::VMV_S_X, RISCV::VREDMINU_VS, RISCV::VMV_X_S};
+ break;
+ case Intrinsic::maxnum:
+ SplitOp = RISCV::VFMAX_VV;
+ Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
+ break;
+ case Intrinsic::minnum:
+ SplitOp = RISCV::VFMIN_VV;
+ Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
+ break;
+ }
+ // Add a cost for data larger than LMUL8
+ InstructionCost SplitCost =
+ (LT.first > 1) ? (LT.first - 1) *
+ getRISCVInstructionCost(SplitOp, LT.second, CostKind)
+ : 0;
+ return SplitCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
}
InstructionCost
@@ -1392,20 +1431,50 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
- if (Ty->getElementType()->isIntegerTy(1))
+ std::array<unsigned, 3> Opcodes;
+ if (Ty->getElementType()->isIntegerTy(1)) {
// vcpop sequences, see vreduction-mask.ll
- return (LT.first - 1) + (ISD == ISD::AND ? 3 : 2);
+ if (ISD == ISD::AND)
+ Opcodes = {RISCV::VMNAND_MM, RISCV::VCPOP_M, RISCV::SLT};
+ else
+ Opcodes = {RISCV::VCPOP_M, RISCV::SLT};
+ return (LT.first - 1) +
+ getRISCVInstructionCost(Opcodes, LT.second, CostKind);
+ }
// IR Reduction is composed by two vmv and one rvv reduction instruction.
- InstructionCost BaseCost = 2;
-
- if (CostKind == TTI::TCK_CodeSize)
- return (LT.first - 1) + BaseCost;
-
- unsigned VL = getEstimatedVLFor(Ty);
- if (TTI::requiresOrderedReduction(FMF))
- return (LT.first - 1) + BaseCost + VL;
- return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
+ unsigned SplitOp;
+ switch (ISD) {
+ case ISD::ADD:
+ SplitOp = RISCV::VADD_VV;
+ Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
+ break;
+ case ISD::OR:
+ SplitOp = RISCV::VOR_VV;
+ Opcodes = {RISCV::VMV_S_X, RISCV::VREDOR_VS, RISCV::VMV_X_S};
+ break;
+ case ISD::XOR:
+ SplitOp = RISCV::VXOR_VV;
+ Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
+ break;
+ case ISD::AND:
+ SplitOp = RISCV::VAND_VV;
+ Opcodes = {RISCV::VMV_S_X, RISCV::VREDAND_VS, RISCV::VMV_X_S};
+ break;
+ case ISD::FADD:
+ SplitOp = RISCV::VFADD_VV;
+ if (TTI::requiresOrderedReduction(FMF))
+ Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDOSUM_VS, RISCV::VFMV_F_S};
+ else
+ Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
+ break;
+ }
+ // Add a cost for data larger than LMUL8
+ InstructionCost SplitCost =
+ (LT.first > 1) ? (LT.first - 1) *
+ getRISCVInstructionCost(SplitOp, LT.second, CostKind)
+ : 0;
+ return SplitCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
}
InstructionCost RISCVTTIImpl::getExtendedReductionCost(
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
index 6fe098628ea078..ed9d71cad0be61 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
@@ -6,25 +6,25 @@
define i32 @reduce_i1(i32 %arg) {
; CHECK-LABEL: 'reduce_i1'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i1'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
@@ -51,14 +51,14 @@ define i32 @reduce_i8(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i8'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
@@ -85,14 +85,14 @@ define i32 @reduce_i16(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i16'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
@@ -115,18 +115,18 @@ define i32 @reduce_i32(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i32'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for i...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/77342
More information about the llvm-commits
mailing list