[llvm] [ARM] Add scalar add_sat costs. (PR #100988)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 29 01:48:12 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-llvm-analysis
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
These can usually generate:
- qadd / qsub for signed i32 scalars
- uqadd16 / qadd16 / uqsub16 / qsub16 with an extend for signed/unsigned i8/i16
- Are expanded to an add + cmp + sel otherwise
This can lead to differences in unrolling etc, but should be a better cost for the instructions.
---
Patch is 91.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100988.diff
4 Files Affected:
- (modified) llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp (+26-5)
- (modified) llvm/test/Analysis/CostModel/ARM/arith-ssat.ll (+48-48)
- (modified) llvm/test/Analysis/CostModel/ARM/arith-usat.ll (+48-48)
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll (+11-11)
``````````diff
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index e940dce7faa14..14020c830e7bc 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1888,7 +1888,8 @@ ARMTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
InstructionCost
ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
- switch (ICA.getID()) {
+ unsigned Opc = ICA.getID();
+ switch (Opc) {
case Intrinsic::get_active_lane_mask:
// Currently we make a somewhat optimistic assumption that
// active_lane_mask's are always free. In reality it may be freely folded
@@ -1904,17 +1905,37 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
case Intrinsic::ssub_sat:
case Intrinsic::uadd_sat:
case Intrinsic::usub_sat: {
+ bool IsAdd = (Opc == Intrinsic::sadd_sat || Opc == Intrinsic::ssub_sat);
+ bool IsSigned = (Opc == Intrinsic::sadd_sat || Opc == Intrinsic::ssub_sat);
+ Type *RetTy = ICA.getReturnType();
+
+ if (RetTy->isIntegerTy()) {
+ if (IsSigned && ST->hasDSP() && RetTy->isIntegerTy(32))
+ return 1; // qadd / qsub
+ if (ST->hasDSP() && (RetTy->isIntegerTy(8) || RetTy->isIntegerTy(16)))
+ return 2; // uqadd16 / qadd16 / uqsub16 / qsub16 + possible extend.
+ // Otherwise return the cost of expanding the node. add +
+ CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
+ Type *CondTy = RetTy->getWithNewBitWidth(1);
+ return getArithmeticInstrCost(IsAdd ? Instruction::Add : Instruction::Sub,
+ RetTy, CostKind) +
+ 2 * getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, Pred,
+ CostKind) +
+ 2 * getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, Pred,
+ CostKind);
+ }
+
if (!ST->hasMVEIntegerOps())
break;
- Type *VT = ICA.getReturnType();
- std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VT);
+ std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
LT.second == MVT::v16i8) {
// This is a base cost of 1 for the vqadd, plus 3 extract shifts if we
// need to extend the type, as it uses shr(qadd(shl, shl)).
unsigned Instrs =
- LT.second.getScalarSizeInBits() == VT->getScalarSizeInBits() ? 1 : 4;
+ LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1
+ : 4;
return LT.first * ST->getMVEVectorCostFactor(CostKind) * Instrs;
}
break;
@@ -1948,7 +1969,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
case Intrinsic::fptoui_sat: {
if (ICA.getArgTypes().empty())
break;
- bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;
+ bool IsSigned = Opc == Intrinsic::fptosi_sat;
auto LT = getTypeLegalizationCost(ICA.getArgTypes()[0]);
EVT MTy = TLI->getValueType(DL, ICA.getReturnType());
// Check for the legal types, with the corect subtarget features.
diff --git a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll
index ba1d163882bcb..40091c3f742f2 100644
--- a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll
+++ b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll
@@ -36,22 +36,22 @@ declare <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8>, <64 x i8>)
define i32 @add(i32 %arg) {
; V8M-RECIP-LABEL: 'add'
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
+; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
+; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
+; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
+; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
@@ -61,22 +61,22 @@ define i32 @add(i32 %arg) {
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; NEON-RECIP-LABEL: 'add'
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
+; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
+; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
+; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
+; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
@@ -86,22 +86,22 @@ define i32 @add(i32 %arg) {
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; MVE-RECIP-LABEL: 'add'
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
+; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 594 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
+; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
+; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
+; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
@@ -111,22 +111,22 @@ define i32 @add(i32 %arg) {
; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; V8M-SIZE-LABEL: 'add'
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
+; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
+; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
+; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
+; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
@@ -136,22 +136,22 @@ define i32 @add(i32 %arg) {
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; NEON-SIZE-LABEL: 'add'
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
+; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
+; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
+; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/100988
More information about the llvm-commits
mailing list