[llvm] [AArch64][CostModel] Alter sdiv/srem cost where the divisor is constant (PR #123552)
Sushant Gokhale via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 19 23:06:56 PST 2025
https://github.com/sushgokh created https://github.com/llvm/llvm-project/pull/123552
This patch draws its inspiration from the udiv/urem patch #122236
For sdiv, typical sequence of instructions as per the type and divisor property is as follows:
```
Scalar power-of-2: cmp + csel + asr
Vector power-of-2(Neon): usra + sshr
Scalar non-power-2: smulh/smull + asr/lsr + add/sub + asr + add
Vector non-power-2(Neon):
a) <2 x i64>: 2 * (smulh + asr + add) --> This yeilds scalarized form.
b) <4 x i32>: smull2 + smull + uzp2 + add + sshr + usra
```
SVE versions should have more or less the same cost because sometimes they yeild native sdiv instructions, which should have less cost or the same sequence of neon instructions.
For srem, typical sequence of instructions as per the type and divisor property is as follows:
```
Scalar version: <set of sdiv instructions> + msub
Vector version: <set of sdiv instructions> + 2-msub/1-mls
```
>From 4e2c6bdc72970dcebd7ba4b2c32265fb9abcb3b8 Mon Sep 17 00:00:00 2001
From: sgokhale <sgokhale at nvidia.com>
Date: Mon, 20 Jan 2025 12:28:59 +0530
Subject: [PATCH] [AArch64][CostModel] Alter sdiv/srem cost where the divisor
is constant This patch draws its inspiration from the udiv/urem patch #122236
For sdiv, typical sequence of instructions as per the type and divisor property is as follows:
Scalar power-of-2: cmp + csel + asr
Neon power-of-2: usra + sshr
Scalar non-power-2: smulh/smull + asr/lsr + add/sub + asr + add
Vector non-power-2:
a) <2 x i64>: 2 * (smulh + asr + add) . This yeilds scalarized form.
b) <4 x i32>: smull2 + smull + uzp2 + add + sshr + usra
SVE versions should have more or less the same cost because sometimes they yeild native sdiv instructions, which should have less cost or the same sequence of neon instructions.
For srem, typical sequence of instructions as per the type and divisor property is as follows:
Scalar version: <set of sdiv instructions> + msub
Vector version: <set of sdiv instructions> + 2-msub/1-mls
---
.../AArch64/AArch64TargetTransformInfo.cpp | 62 +++-
llvm/test/Analysis/CostModel/AArch64/div.ll | 276 +++++++--------
.../Analysis/CostModel/AArch64/div_cte.ll | 6 +-
llvm/test/Analysis/CostModel/AArch64/rem.ll | 314 +++++++++---------
.../Analysis/CostModel/AArch64/sve-div.ll | 96 +++---
.../Analysis/CostModel/AArch64/sve-rem.ll | 276 +++++++--------
.../CostModel/AArch64/sve-remainder.ll | 24 +-
.../LoopVectorize/AArch64/blend-costs.ll | 230 ++++++++++++-
.../Transforms/SLPVectorizer/AArch64/div.ll | 86 +----
9 files changed, 783 insertions(+), 587 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 932a6f9ce23fd2..9815cadb756b52 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3491,23 +3491,53 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
default:
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
Op2Info);
+ case ISD::SREM:
case ISD::SDIV:
- if (Op2Info.isConstant() && Op2Info.isUniform() && Op2Info.isPowerOf2()) {
- // On AArch64, scalar signed division by constants power-of-two are
- // normally expanded to the sequence ADD + CMP + SELECT + SRA.
- // The OperandValue properties many not be same as that of previous
- // operation; conservatively assume OP_None.
- InstructionCost Cost = getArithmeticInstrCost(
- Instruction::Add, Ty, CostKind,
- Op1Info.getNoProps(), Op2Info.getNoProps());
- Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind,
- Op1Info.getNoProps(), Op2Info.getNoProps());
- Cost += getArithmeticInstrCost(
- Instruction::Select, Ty, CostKind,
- Op1Info.getNoProps(), Op2Info.getNoProps());
- Cost += getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
- Op1Info.getNoProps(), Op2Info.getNoProps());
- return Cost;
+ /*
+ For sdiv, typical sequence of instructions as per the type and divisor
+ property is as follows:
+ Scalar power-of-2: cmp + csel + asr
+ Vector power-of-2: usra + sshr
+
+ Scalar non-power-2: smulh/smull + asr/lsr + add/sub + asr + add
+ Vector non-power-2:
+ a) <2 x i64>: 2 * (smulh + asr + add) --> This yeilds scalarized form.
+ b) <4 x i32>: smull2 + smull + uzp2 + add + sshr + usra
+
+ SVE versions should have more or less the same cost because sometimes they
+ yeild native sdiv instructions, which should have less cost or the same
+ sequence of neon instructions.
+
+ For srem, typical sequence of instructions as per the type and divisor
+ property is as follows:
+ Scalar version: <set of sdiv instructions> + msub
+ Vector version: <set of sdiv instructions> + 2-msub/mls
+ */
+ if (Op2Info.isConstant()) {
+ InstructionCost AsrCost =
+ getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
+ Op1Info.getNoProps(), Op2Info.getNoProps());
+ InstructionCost AddCost =
+ getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
+ Op1Info.getNoProps(), Op2Info.getNoProps());
+ InstructionCost MulCost =
+ getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
+ Op1Info.getNoProps(), Op2Info.getNoProps());
+
+ bool HasSMUL = !Op2Info.isPowerOf2();
+ unsigned NumOfSMUL = HasSMUL ? (LT.second.isVector() ? 2 : 1) : 0;
+ bool HasExtraAsr =
+ (LT.second.isVector() || LT.second == MVT::i32) && HasSMUL;
+
+ InstructionCost CommonCost = AsrCost + AddCost;
+ // We typicall get 1 msub for scalar and 2-msub/1-mls for the vector form.
+ // Typically, the cost of msub is same and mls is twice as costly as
+ // add/sub/mul.
+ InstructionCost MlsOrMSubCost = (LT.second.isVector() ? 2 : 1) * MulCost;
+ InstructionCost DivCost =
+ CommonCost + (MulCost * NumOfSMUL) /* SMULH/SMULH */ +
+ (AsrCost * HasExtraAsr); // Coming with second SMULH
+ return DivCost + (ISD == ISD::SREM ? MlsOrMSubCost : 0);
}
[[fallthrough]];
case ISD::UDIV: {
diff --git a/llvm/test/Analysis/CostModel/AArch64/div.ll b/llvm/test/Analysis/CostModel/AArch64/div.ll
index ef52d0db01eefd..0881aa39810408 100644
--- a/llvm/test/Analysis/CostModel/AArch64/div.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/div.ll
@@ -121,29 +121,29 @@ define i32 @udiv() {
define i32 @sdiv_const() {
; CHECK-LABEL: 'sdiv_const'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 6, i64 7>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i32 = sdiv <2 x i32> undef, <i32 4, i32 5>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i16 = sdiv <2 x i16> undef, <i16 4, i16 5>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i16 = sdiv <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i8 = sdiv <2 x i8> undef, <i8 4, i8 5>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i8 = sdiv <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i8 = sdiv <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I128 = sdiv i128 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = sdiv i64 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 6, i64 7>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i32 = sdiv <2 x i32> undef, <i32 4, i32 5>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i16 = sdiv <2 x i16> undef, <i16 4, i16 5>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = sdiv <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = sdiv <2 x i8> undef, <i8 4, i8 5>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i8 = sdiv <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i8 = sdiv <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I128 = sdiv i128 undef, 7
@@ -238,29 +238,29 @@ define i32 @udiv_const() {
define i32 @sdiv_uniformconst() {
; CHECK-LABEL: 'sdiv_uniformconst'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I128 = sdiv i128 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = sdiv i64 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I128 = sdiv i128 undef, 7
@@ -354,29 +354,29 @@ define i32 @udiv_uniformconst() {
define i32 @sdiv_constpow2() {
; CHECK-LABEL: 'sdiv_constpow2'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = sdiv i64 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 8, i64 16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = sdiv i32 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i32 = sdiv <2 x i32> undef, <i32 2, i32 4>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = sdiv i16 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i16 = sdiv <2 x i16> undef, <i16 2, i16 4>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i16 = sdiv <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = sdiv i8 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i8 = sdiv <2 x i8> undef, <i8 2, i8 4>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i8 = sdiv <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i8 = sdiv <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I128 = sdiv i128 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = sdiv i64 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 8, i64 16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = sdiv i32 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sdiv <2 x i32> undef, <i32 2, i32 4>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = sdiv i16 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sdiv <2 x i16> undef, <i16 2, i16 4>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sdiv <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = sdiv i8 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sdiv <2 x i8> undef, <i8 2, i8 4>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sdiv <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = sdiv <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I128 = sdiv i128 undef, 16
@@ -470,29 +470,29 @@ define i32 @udiv_constpow2() {
define i32 @sdiv_uniformconstpow2() {
; CHECK-LABEL: 'sdiv_uniformconstpow2'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = sdiv i64 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = sdiv i32 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = sdiv i16 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = sdiv i8 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I128 = sdiv i128 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = sdiv i64 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = sdiv i32 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = sdiv i16 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = sdiv i8 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I128 = sdiv i128 undef, 16
@@ -586,29 +586,29 @@ define i32 @udiv_uniformconstpow2() {
define i32 @sdiv_constnegpow2() {
; CHECK-LABEL: 'sdiv_constnegpow2'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -8, i64 -16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i32 = sdiv <2 x i32> undef, <i32 -2, i32 -4>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i16 = sdiv <2 x i16> undef, <i16 -2, i16 -4>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i16 = sdiv <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2i8 = sdiv <2 x i8> undef, <i8 -2, i8 -4>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i8 = sdiv <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i8 = sdiv <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I128 = sdiv i128 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = sdiv i64 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -8, i64 -16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i32 = sdiv <2 x i32> undef, <i32 -2, i32 -4>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i16 = sdiv <2 x i16> undef, <i16 -2, i16 -4>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = sdiv <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = sdiv <2 x i8> undef, <i8 -2, i8 -4>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i8 = sdiv <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i8 = sdiv <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I128 = sdiv i128 undef, -16
@@ -702,29 +702,29 @@ define i32 @udiv_constnegpow2() {
define i32 @sdiv_uniformconstnegpow2() {
; CHECK-LABEL: 'sdiv_uniformconstnegpow2'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I128 = sdiv i128 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = sdiv i64 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I128 = sdiv i128 undef, -16
diff --git a/llvm/test/Analysis/CostModel/AArch64/div_cte.ll b/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
index dd93aed53c0f2a..9f9588d4387b15 100644
--- a/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
@@ -7,7 +7,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
define <16 x i8> @sdiv8xi16(<16 x i8> %x) {
; CHECK-LABEL: 'sdiv8xi16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <16 x i8> %x, splat (i8 9)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = sdiv <16 x i8> %x, splat (i8 9)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %div
;
%div = sdiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
@@ -16,7 +16,7 @@ define <16 x i8> @sdiv8xi16(<16 x i8> %x) {
define <8 x i16> @sdiv16xi8(<8 x i16> %x) {
; CHECK-LABEL: 'sdiv16xi8'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <8 x i16> %x, splat (i16 9)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = sdiv <8 x i16> %x, splat (i16 9)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div
;
%div = sdiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
@@ -25,7 +25,7 @@ define <8 x i16> @sdiv16xi8(<8 x i16> %x) {
define <4 x i32> @sdiv32xi4(<4 x i32> %x) {
; CHECK-LABEL: 'sdiv32xi4'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <4 x i32> %x, splat (i32 9)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = sdiv <4 x i32> %x, splat (i32 9)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div
;
%div = sdiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
diff --git a/llvm/test/Analysis/CostModel/AArch64/rem.ll b/llvm/test/Analysis/CostModel/AArch64/rem.ll
index 06c05aefedf2be..e9155fb42e2ec8 100644
--- a/llvm/test/Analysis/CostModel/AArch64/rem.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/rem.ll
@@ -5,29 +5,29 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
define i32 @srem() {
; CHECK-LABEL: 'srem'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %I128 = srem i128 undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = srem i128 undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = srem <2 x i64> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = srem <4 x i64> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = srem <8 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V2i64 = srem <2 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4i64 = srem <4 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8i64 = srem <8 x i64> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = srem <2 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V2i32 = srem <2 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i32 = srem <4 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8i32 = srem <8 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V16i32 = srem <16 x i32> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i16 = srem <2 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i16 = srem <4 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V2i16 = srem <2 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i16 = srem <4 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V8i16 = srem <8 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V16i16 = srem <16 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V32i16 = srem <32 x i16> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i8 = srem <2 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = srem <4 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i8 = srem <8 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V2i8 = srem <2 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i8 = srem <4 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V8i8 = srem <8 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 232 for instruction: %V16i8 = srem <16 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 464 for instruction: %V32i8 = srem <32 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 928 for instruction: %V64i8 = srem <64 x i8> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I128 = srem i128 undef, undef
@@ -121,29 +121,29 @@ define i32 @urem() {
define i32 @srem_const() {
; CHECK-LABEL: 'srem_const'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %I128 = srem i128 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = srem i64 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = srem <2 x i64> undef, <i64 6, i64 7>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = srem <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = srem <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = srem <2 x i32> undef, <i32 4, i32 5>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i16 = srem <2 x i16> undef, <i16 4, i16 5>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i16 = srem <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i8 = srem <2 x i8> undef, <i8 4, i8 5>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = srem <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i8 = srem <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I128 = srem i128 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V2i64 = srem <2 x i64> undef, <i64 6, i64 7>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V4i64 = srem <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %V8i64 = srem <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = srem i32 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = srem <2 x i32> undef, <i32 4, i32 5>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = srem i16 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = srem <2 x i16> undef, <i16 4, i16 5>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = srem <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = srem <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = srem <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = srem <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i8 = srem <2 x i8> undef, <i8 4, i8 5>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = srem <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i8 = srem <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = srem <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = srem <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = srem <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I128 = srem i128 undef, 7
@@ -238,29 +238,29 @@ define i32 @urem_const() {
define i32 @srem_uniformconst() {
; CHECK-LABEL: 'srem_uniformconst'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %I128 = srem i128 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = srem i64 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, 7
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I128 = srem i128 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = srem i32 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = srem i16 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, 7
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I128 = srem i128 undef, 7
@@ -354,29 +354,29 @@ define i32 @urem_uniformconst() {
define i32 @srem_constpow2() {
; CHECK-LABEL: 'srem_constpow2'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %I128 = srem i128 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = srem <2 x i64> undef, <i64 8, i64 16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = srem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = srem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = srem i32 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = srem <2 x i32> undef, <i32 2, i32 4>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i16 = srem <2 x i16> undef, <i16 2, i16 4>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i16 = srem <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = srem i8 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i8 = srem <2 x i8> undef, <i8 2, i8 4>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = srem <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i8 = srem <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I128 = srem i128 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V2i64 = srem <2 x i64> undef, <i64 8, i64 16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V4i64 = srem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V8i64 = srem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i32 = srem <2 x i32> undef, <i32 2, i32 4>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i16 = srem <2 x i16> undef, <i16 2, i16 4>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i16 = srem <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i8 = srem <2 x i8> undef, <i8 2, i8 4>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i8 = srem <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i8 = srem <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = srem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I128 = srem i128 undef, 16
@@ -470,29 +470,29 @@ define i32 @urem_constpow2() {
define i32 @srem_uniformconstpow2() {
; CHECK-LABEL: 'srem_uniformconstpow2'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %I128 = srem i128 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = srem i32 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = srem i8 undef, 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 704 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I128 = srem i128 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I128 = srem i128 undef, 16
@@ -586,29 +586,29 @@ define i32 @urem_uniformconstpow2() {
define i32 @srem_constnegpow2() {
; CHECK-LABEL: 'srem_constnegpow2'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %I128 = srem i128 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = srem i64 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -8, i64 -16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = srem <2 x i32> undef, <i32 -2, i32 -4>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i16 = srem <2 x i16> undef, <i16 -2, i16 -4>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i16 = srem <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i8 = srem <2 x i8> undef, <i8 -2, i8 -4>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = srem <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i8 = srem <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I128 = srem i128 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -8, i64 -16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = srem i32 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = srem <2 x i32> undef, <i32 -2, i32 -4>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = srem i16 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = srem <2 x i16> undef, <i16 -2, i16 -4>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = srem <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i8 = srem <2 x i8> undef, <i8 -2, i8 -4>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = srem <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i8 = srem <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I128 = srem i128 undef, -16
@@ -702,29 +702,29 @@ define i32 @urem_constnegpow2() {
define i32 @srem_uniformconstnegpow2() {
; CHECK-LABEL: 'srem_uniformconstnegpow2'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %I128 = srem i128 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = srem i64 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, -16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I128 = srem i128 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = srem i32 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = srem i16 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, -16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I128 = srem i128 undef, -16
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-div.ll b/llvm/test/Analysis/CostModel/AArch64/sve-div.ll
index 4c25e3003177d9..a925148e1efe2b 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-div.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-div.ll
@@ -163,24 +163,24 @@ define void @udiv() {
define void @sdiv_uniformconst() {
; CHECK-LABEL: 'sdiv_uniformconst'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 7)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 7)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 7)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 7)
@@ -321,24 +321,24 @@ define void @udiv_uniformconst() {
define void @sdiv_uniformconstpow2() {
; CHECK-LABEL: 'sdiv_uniformconstpow2'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 16)
@@ -479,24 +479,24 @@ define void @udiv_uniformconstpow2() {
define void @sdiv_uniformconstnegpow2() {
; CHECK-LABEL: 'sdiv_uniformconstnegpow2'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 -16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 -16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 -16)
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll b/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll
index 10455c06b3f456..327ee6840aad30 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll
@@ -5,42 +5,42 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
define void @srem() {
; CHECK-LABEL: 'srem'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = srem <4 x i64> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = srem <8 x i64> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = srem <2 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = srem <2 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = srem <4 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i16 = srem <16 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = srem <32 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i8 = srem <2 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i8 = srem <4 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i8 = srem <8 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = srem <16 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = srem <32 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2i64 = srem <2 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4i64 = srem <4 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i64 = srem <8 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = srem <2 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = srem <8 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i32 = srem <16 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2i16 = srem <2 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4i16 = srem <4 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8i16 = srem <8 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16i16 = srem <16 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32i16 = srem <32 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2i8 = srem <2 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = srem <4 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8i8 = srem <8 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i8 = srem <16 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32i8 = srem <32 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V64i8 = srem <64 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%V2i64 = srem <2 x i64> undef, undef
@@ -163,42 +163,42 @@ define void @urem() {
define void @srem_uniformconst() {
; CHECK-LABEL: 'srem_uniformconst'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 7)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 7)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 7)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 7)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%V2i64 = srem <2 x i64> undef, splat (i64 7)
@@ -321,42 +321,42 @@ define void @urem_uniformconst() {
define void @srem_uniformconstpow2() {
; CHECK-LABEL: 'srem_uniformconstpow2'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 404 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%V2i64 = srem <2 x i64> undef, splat (i64 16)
@@ -479,42 +479,42 @@ define void @urem_uniformconstpow2() {
define void @srem_uniformconstnegpow2() {
; CHECK-LABEL: 'srem_uniformconstnegpow2'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%V2i64 = srem <2 x i64> undef, splat (i64 -16)
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-remainder.ll b/llvm/test/Analysis/CostModel/AArch64/sve-remainder.ll
index 711f06c473130a..97b7d7641fc43c 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-remainder.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-remainder.ll
@@ -9,26 +9,26 @@ define void @test_urem_srem_expand() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %legal_type_urem_1 = urem <vscale x 8 x i16> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %legal_type_urem_2 = urem <vscale x 4 x i32> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %legal_type_urem_3 = urem <vscale x 2 x i64> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %legal_type_srem_0 = srem <vscale x 16 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %legal_type_srem_1 = srem <vscale x 8 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %legal_type_srem_2 = srem <vscale x 4 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %legal_type_srem_3 = srem <vscale x 2 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %legal_type_srem_0 = srem <vscale x 16 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %legal_type_srem_1 = srem <vscale x 8 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %legal_type_srem_2 = srem <vscale x 4 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %legal_type_srem_3 = srem <vscale x 2 x i64> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %split_type_urem_0 = urem <vscale x 32 x i8> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %split_type_urem_1 = urem <vscale x 16 x i16> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %split_type_urem_2 = urem <vscale x 8 x i32> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %split_type_urem_3 = urem <vscale x 4 x i64> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %split_type_srem_0 = srem <vscale x 32 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %split_type_srem_1 = srem <vscale x 16 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %split_type_srem_2 = srem <vscale x 8 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %split_type_srem_3 = srem <vscale x 4 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %split_type_srem_0 = srem <vscale x 32 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %split_type_srem_1 = srem <vscale x 16 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %split_type_srem_2 = srem <vscale x 8 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %split_type_srem_3 = srem <vscale x 4 x i64> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %widen_type_urem_0 = urem <vscale x 31 x i8> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %widen_type_urem_1 = urem <vscale x 15 x i16> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %widen_type_urem_2 = urem <vscale x 7 x i32> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %widen_type_urem_3 = urem <vscale x 3 x i64> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %widen_type_srem_0 = srem <vscale x 31 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %widen_type_srem_1 = srem <vscale x 15 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %widen_type_srem_2 = srem <vscale x 7 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %widen_type_srem_3 = srem <vscale x 3 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %widen_type_srem_0 = srem <vscale x 31 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %widen_type_srem_1 = srem <vscale x 15 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %widen_type_srem_2 = srem <vscale x 7 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %widen_type_srem_3 = srem <vscale x 3 x i64> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
index 254cdf2d14d9f2..e5aacd3312321e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
@@ -424,12 +424,229 @@ exit:
define void @test_blend_feeding_replicated_store_3(ptr noalias %src.1, ptr noalias %src.2, ptr noalias %dst, i32 %x, i64 %N, i1 %c.2) {
; CHECK-LABEL: define void @test_blend_feeding_replicated_store_3(
; CHECK-SAME: ptr noalias [[SRC_1:%.*]], ptr noalias [[SRC_2:%.*]], ptr noalias [[DST:%.*]], i32 [[X:%.*]], i64 [[N:%.*]], i1 [[C_2:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ITER_CHECK:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
+; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 16
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[C_2]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <16 x i32> poison, i32 [[X]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT4]], <16 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE37:.*]] ]
+; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[SRC_1]], align 1
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i8> poison, i8 [[L_1]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT2]], <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT3]] to <16 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[BROADCAST_SPLAT5]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = sdiv <16 x i32> [[TMP4]], splat (i32 255)
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[SRC_2]], align 1
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <16 x i8> poison, i8 [[TMP6]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT6]], <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <16 x i8> [[BROADCAST_SPLAT7]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i1> [[TMP7]], splat (i1 true)
+; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP8]], <16 x i1> [[TMP1]], <16 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i32> [[TMP5]] to <16 x i8>
+; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i1> [[TMP9]], [[TMP7]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP9]], <16 x i8> [[TMP10]], <16 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i1> [[TMP11]], i32 0
+; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 0
+; CHECK-NEXT: store i8 [[TMP13]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
+; CHECK: [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i1> [[TMP11]], i32 1
+; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
+; CHECK: [[PRED_STORE_IF8]]:
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 1
+; CHECK-NEXT: store i8 [[TMP15]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]]
+; CHECK: [[PRED_STORE_CONTINUE9]]:
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP11]], i32 2
+; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11:.*]]
+; CHECK: [[PRED_STORE_IF10]]:
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 2
+; CHECK-NEXT: store i8 [[TMP17]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE11]]
+; CHECK: [[PRED_STORE_CONTINUE11]]:
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i1> [[TMP11]], i32 3
+; CHECK-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]]
+; CHECK: [[PRED_STORE_IF12]]:
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 3
+; CHECK-NEXT: store i8 [[TMP19]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE13]]
+; CHECK: [[PRED_STORE_CONTINUE13]]:
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i1> [[TMP11]], i32 4
+; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]]
+; CHECK: [[PRED_STORE_IF14]]:
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 4
+; CHECK-NEXT: store i8 [[TMP21]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE15]]
+; CHECK: [[PRED_STORE_CONTINUE15]]:
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i1> [[TMP11]], i32 5
+; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]]
+; CHECK: [[PRED_STORE_IF16]]:
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 5
+; CHECK-NEXT: store i8 [[TMP23]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE17]]
+; CHECK: [[PRED_STORE_CONTINUE17]]:
+; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP11]], i32 6
+; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]]
+; CHECK: [[PRED_STORE_IF18]]:
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 6
+; CHECK-NEXT: store i8 [[TMP25]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE19]]
+; CHECK: [[PRED_STORE_CONTINUE19]]:
+; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i1> [[TMP11]], i32 7
+; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]]
+; CHECK: [[PRED_STORE_IF20]]:
+; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 7
+; CHECK-NEXT: store i8 [[TMP27]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE21]]
+; CHECK: [[PRED_STORE_CONTINUE21]]:
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[TMP11]], i32 8
+; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]]
+; CHECK: [[PRED_STORE_IF22]]:
+; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 8
+; CHECK-NEXT: store i8 [[TMP29]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE23]]
+; CHECK: [[PRED_STORE_CONTINUE23]]:
+; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i1> [[TMP11]], i32 9
+; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]]
+; CHECK: [[PRED_STORE_IF24]]:
+; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 9
+; CHECK-NEXT: store i8 [[TMP31]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE25]]
+; CHECK: [[PRED_STORE_CONTINUE25]]:
+; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP11]], i32 10
+; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27:.*]]
+; CHECK: [[PRED_STORE_IF26]]:
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 10
+; CHECK-NEXT: store i8 [[TMP33]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE27]]
+; CHECK: [[PRED_STORE_CONTINUE27]]:
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[TMP11]], i32 11
+; CHECK-NEXT: br i1 [[TMP34]], label %[[PRED_STORE_IF28:.*]], label %[[PRED_STORE_CONTINUE29:.*]]
+; CHECK: [[PRED_STORE_IF28]]:
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 11
+; CHECK-NEXT: store i8 [[TMP35]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE29]]
+; CHECK: [[PRED_STORE_CONTINUE29]]:
+; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i1> [[TMP11]], i32 12
+; CHECK-NEXT: br i1 [[TMP36]], label %[[PRED_STORE_IF30:.*]], label %[[PRED_STORE_CONTINUE31:.*]]
+; CHECK: [[PRED_STORE_IF30]]:
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 12
+; CHECK-NEXT: store i8 [[TMP37]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE31]]
+; CHECK: [[PRED_STORE_CONTINUE31]]:
+; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i1> [[TMP11]], i32 13
+; CHECK-NEXT: br i1 [[TMP38]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]]
+; CHECK: [[PRED_STORE_IF32]]:
+; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 13
+; CHECK-NEXT: store i8 [[TMP39]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE33]]
+; CHECK: [[PRED_STORE_CONTINUE33]]:
+; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[TMP11]], i32 14
+; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35:.*]]
+; CHECK: [[PRED_STORE_IF34]]:
+; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 14
+; CHECK-NEXT: store i8 [[TMP41]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE35]]
+; CHECK: [[PRED_STORE_CONTINUE35]]:
+; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i1> [[TMP11]], i32 15
+; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF36:.*]], label %[[PRED_STORE_CONTINUE37]]
+; CHECK: [[PRED_STORE_IF36]]:
+; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 15
+; CHECK-NEXT: store i8 [[TMP43]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE37]]
+; CHECK: [[PRED_STORE_CONTINUE37]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP44]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
+; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
+; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]]
+; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
+; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]]
+; CHECK: [[VEC_EPILOG_PH]]:
+; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT: [[N_MOD_VF38:%.*]] = urem i64 [[TMP0]], 4
+; CHECK-NEXT: [[N_VEC39:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF38]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT40:%.*]] = insertelement <4 x i1> poison, i1 [[C_2]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT41:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT40]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP45:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT41]], splat (i1 true)
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT45:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT46:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT45]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
+; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX42:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT58:%.*]], %[[PRED_STORE_CONTINUE57:.*]] ]
+; CHECK-NEXT: [[TMP46:%.*]] = load i8, ptr [[SRC_1]], align 1
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT43:%.*]] = insertelement <4 x i8> poison, i8 [[TMP46]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT44:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT43]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP47:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT44]] to <4 x i32>
+; CHECK-NEXT: [[TMP48:%.*]] = mul <4 x i32> [[BROADCAST_SPLAT46]], [[TMP47]]
+; CHECK-NEXT: [[TMP49:%.*]] = sdiv <4 x i32> [[TMP48]], splat (i32 255)
+; CHECK-NEXT: [[TMP50:%.*]] = load i8, ptr [[SRC_2]], align 1
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT47:%.*]] = insertelement <4 x i8> poison, i8 [[TMP50]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT48:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT47]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP51:%.*]] = icmp eq <4 x i8> [[BROADCAST_SPLAT48]], zeroinitializer
+; CHECK-NEXT: [[TMP52:%.*]] = xor <4 x i1> [[TMP51]], splat (i1 true)
+; CHECK-NEXT: [[TMP53:%.*]] = select <4 x i1> [[TMP52]], <4 x i1> [[TMP45]], <4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP54:%.*]] = trunc <4 x i32> [[TMP49]] to <4 x i8>
+; CHECK-NEXT: [[TMP55:%.*]] = or <4 x i1> [[TMP53]], [[TMP51]]
+; CHECK-NEXT: [[PREDPHI49:%.*]] = select <4 x i1> [[TMP53]], <4 x i8> [[TMP54]], <4 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP56:%.*]] = extractelement <4 x i1> [[TMP55]], i32 0
+; CHECK-NEXT: br i1 [[TMP56]], label %[[PRED_STORE_IF50:.*]], label %[[PRED_STORE_CONTINUE51:.*]]
+; CHECK: [[PRED_STORE_IF50]]:
+; CHECK-NEXT: [[TMP57:%.*]] = extractelement <4 x i8> [[PREDPHI49]], i32 0
+; CHECK-NEXT: store i8 [[TMP57]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE51]]
+; CHECK: [[PRED_STORE_CONTINUE51]]:
+; CHECK-NEXT: [[TMP58:%.*]] = extractelement <4 x i1> [[TMP55]], i32 1
+; CHECK-NEXT: br i1 [[TMP58]], label %[[PRED_STORE_IF52:.*]], label %[[PRED_STORE_CONTINUE53:.*]]
+; CHECK: [[PRED_STORE_IF52]]:
+; CHECK-NEXT: [[TMP59:%.*]] = extractelement <4 x i8> [[PREDPHI49]], i32 1
+; CHECK-NEXT: store i8 [[TMP59]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE53]]
+; CHECK: [[PRED_STORE_CONTINUE53]]:
+; CHECK-NEXT: [[TMP60:%.*]] = extractelement <4 x i1> [[TMP55]], i32 2
+; CHECK-NEXT: br i1 [[TMP60]], label %[[PRED_STORE_IF54:.*]], label %[[PRED_STORE_CONTINUE55:.*]]
+; CHECK: [[PRED_STORE_IF54]]:
+; CHECK-NEXT: [[TMP61:%.*]] = extractelement <4 x i8> [[PREDPHI49]], i32 2
+; CHECK-NEXT: store i8 [[TMP61]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE55]]
+; CHECK: [[PRED_STORE_CONTINUE55]]:
+; CHECK-NEXT: [[TMP62:%.*]] = extractelement <4 x i1> [[TMP55]], i32 3
+; CHECK-NEXT: br i1 [[TMP62]], label %[[PRED_STORE_IF56:.*]], label %[[PRED_STORE_CONTINUE57]]
+; CHECK: [[PRED_STORE_IF56]]:
+; CHECK-NEXT: [[TMP63:%.*]] = extractelement <4 x i8> [[PREDPHI49]], i32 3
+; CHECK-NEXT: store i8 [[TMP63]], ptr [[DST]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE57]]
+; CHECK: [[PRED_STORE_CONTINUE57]]:
+; CHECK-NEXT: [[INDEX_NEXT58]] = add nuw i64 [[INDEX42]], 4
+; CHECK-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT58]], [[N_VEC39]]
+; CHECK-NEXT: br i1 [[TMP64]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N59:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC39]]
+; CHECK-NEXT: br i1 [[CMP_N59]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
+; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC39]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[SRC_1]], align 1
-; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[L_1]] to i32
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT: [[L_3:%.*]] = load i8, ptr [[SRC_1]], align 1
+; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[L_3]] to i32
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[X]], [[EXT]]
; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[MUL]], 255
; CHECK-NEXT: [[L_2:%.*]] = load i8, ptr [[SRC_2]], align 1
@@ -447,7 +664,7 @@ define void @test_blend_feeding_replicated_store_3(ptr noalias %src.1, ptr noali
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -491,4 +708,7 @@ exit:
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
+; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
+; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]}
;.
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll
index 57c877a58d5c0e..05584eaa954de4 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll
@@ -433,26 +433,10 @@ define <8 x i16> @slp_v8i16_Op1_Op2_unknown(<8 x i16> %a, <8 x i16> %b)
}
define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const(<4 x i32> %a)
-; NO-SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const(
-; NO-SVE-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
-; NO-SVE-NEXT: [[A0:%.*]] = extractelement <4 x i32> [[A]], i32 0
-; NO-SVE-NEXT: [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1
-; NO-SVE-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2
-; NO-SVE-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
-; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i32 [[A0]], 1
-; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i32 [[A1]], 3
-; NO-SVE-NEXT: [[TMP3:%.*]] = sdiv i32 [[A2]], 5
-; NO-SVE-NEXT: [[TMP4:%.*]] = sdiv i32 [[A3]], 7
-; NO-SVE-NEXT: [[R0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
-; NO-SVE-NEXT: [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[TMP2]], i32 1
-; NO-SVE-NEXT: [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[TMP3]], i32 2
-; NO-SVE-NEXT: [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[TMP4]], i32 3
-; NO-SVE-NEXT: ret <4 x i32> [[R3]]
-;
-; SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const(
-; SVE-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
-; SVE-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[A]], <i32 1, i32 3, i32 5, i32 7>
-; SVE-NEXT: ret <4 x i32> [[TMP1]]
+; CHECK-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const(
+; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[A]], <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
{
%a0 = extractelement <4 x i32> %a, i32 0
@@ -472,7 +456,7 @@ define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const(<4 x i32> %a)
define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const(<4 x i32> %a)
; CHECK-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const(
-; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[A]], splat (i32 5)
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
@@ -515,26 +499,10 @@ define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const_pow2(<4 x i32> %a)
}
define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2(<4 x i32> %a)
-; NO-SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2(
-; NO-SVE-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
-; NO-SVE-NEXT: [[A0:%.*]] = extractelement <4 x i32> [[A]], i32 0
-; NO-SVE-NEXT: [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1
-; NO-SVE-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2
-; NO-SVE-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
-; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i32 [[A0]], 1
-; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i32 [[A1]], 2
-; NO-SVE-NEXT: [[TMP3:%.*]] = sdiv i32 [[A2]], 4
-; NO-SVE-NEXT: [[TMP4:%.*]] = sdiv i32 [[A3]], 8
-; NO-SVE-NEXT: [[R0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
-; NO-SVE-NEXT: [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[TMP2]], i32 1
-; NO-SVE-NEXT: [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[TMP3]], i32 2
-; NO-SVE-NEXT: [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[TMP4]], i32 3
-; NO-SVE-NEXT: ret <4 x i32> [[R3]]
-;
-; SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2(
-; SVE-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
-; SVE-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[A]], <i32 1, i32 2, i32 4, i32 8>
-; SVE-NEXT: ret <4 x i32> [[TMP1]]
+; CHECK-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2(
+; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[A]], <i32 1, i32 2, i32 4, i32 8>
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
{
%a0 = extractelement <4 x i32> %a, i32 0
@@ -554,35 +522,13 @@ define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2(<4 x i32> %a)
; computes (a/const + x - y) * z
define <2 x i32> @vectorize_sdiv_v2i32(<2 x i32> %a, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
-; NO-SVE-LABEL: define <2 x i32> @vectorize_sdiv_v2i32(
-; NO-SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
-; NO-SVE-NEXT: [[A0:%.*]] = extractelement <2 x i32> [[A]], i64 0
-; NO-SVE-NEXT: [[A1:%.*]] = extractelement <2 x i32> [[A]], i64 1
-; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i32 [[A0]], 2
-; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i32 [[A1]], 4
-; NO-SVE-NEXT: [[X0:%.*]] = extractelement <2 x i32> [[X]], i64 0
-; NO-SVE-NEXT: [[X1:%.*]] = extractelement <2 x i32> [[X]], i64 1
-; NO-SVE-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[X0]]
-; NO-SVE-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[X1]]
-; NO-SVE-NEXT: [[Y0:%.*]] = extractelement <2 x i32> [[Y]], i64 0
-; NO-SVE-NEXT: [[Y1:%.*]] = extractelement <2 x i32> [[Y]], i64 1
-; NO-SVE-NEXT: [[TMP5:%.*]] = sub i32 [[TMP3]], [[Y0]]
-; NO-SVE-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[Y1]]
-; NO-SVE-NEXT: [[Z0:%.*]] = extractelement <2 x i32> [[Z]], i64 0
-; NO-SVE-NEXT: [[Z1:%.*]] = extractelement <2 x i32> [[Z]], i64 1
-; NO-SVE-NEXT: [[TMP7:%.*]] = mul i32 [[TMP5]], [[Z0]]
-; NO-SVE-NEXT: [[TMP8:%.*]] = mul i32 [[TMP6]], [[Z1]]
-; NO-SVE-NEXT: [[RES0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
-; NO-SVE-NEXT: [[RES1:%.*]] = insertelement <2 x i32> [[RES0]], i32 [[TMP8]], i32 1
-; NO-SVE-NEXT: ret <2 x i32> [[RES1]]
-;
-; SVE-LABEL: define <2 x i32> @vectorize_sdiv_v2i32(
-; SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
-; SVE-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[A]], <i32 2, i32 4>
-; SVE-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], [[X]]
-; SVE-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], [[Y]]
-; SVE-NEXT: [[TMP4:%.*]] = mul <2 x i32> [[TMP3]], [[Z]]
-; SVE-NEXT: ret <2 x i32> [[TMP4]]
+; CHECK-LABEL: define <2 x i32> @vectorize_sdiv_v2i32(
+; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[A]], <i32 2, i32 4>
+; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], [[X]]
+; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], [[Y]]
+; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i32> [[TMP3]], [[Z]]
+; CHECK-NEXT: ret <2 x i32> [[TMP4]]
;
{
%a0 = extractelement <2 x i32> %a, i64 0
More information about the llvm-commits
mailing list