[llvm] c480874 - [AArch64][CostModel] Alter sdiv/srem cost where the divisor is constant (#123552)

via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 9 22:26:42 PDT 2025


Author: Sushant Gokhale
Date: 2025-03-09T22:26:39-07:00
New Revision: c4808741e89df29dcd06c7be2784824c82f34e46

URL: https://github.com/llvm/llvm-project/commit/c4808741e89df29dcd06c7be2784824c82f34e46
DIFF: https://github.com/llvm/llvm-project/commit/c4808741e89df29dcd06c7be2784824c82f34e46.diff

LOG: [AArch64][CostModel] Alter sdiv/srem cost where the divisor is constant (#123552)

This patch revises the cost model for sdiv/srem and draws its inspiration from the udiv/urem patch #122236

The typical codegen for the different scenarios has been mentioned as notes/comments in the code itself( this is done owing to lot of scenarios such that it would be difficult to mention them here in the patch description).

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/AArch64/div.ll
    llvm/test/Analysis/CostModel/AArch64/div_cte.ll
    llvm/test/Analysis/CostModel/AArch64/rem.ll
    llvm/test/Analysis/CostModel/AArch64/sve-div.ll
    llvm/test/Analysis/CostModel/AArch64/sve-rem.ll
    llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/div.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 53c6a029e9287..7cec8a17dfaaa 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/CodeGen/CostTable.h"
 #include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsAArch64.h"
@@ -3531,23 +3532,111 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
   default:
     return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
                                          Op2Info);
+  case ISD::SREM:
   case ISD::SDIV:
-    if (Op2Info.isConstant() && Op2Info.isUniform() && Op2Info.isPowerOf2()) {
-      // On AArch64, scalar signed division by constants power-of-two are
-      // normally expanded to the sequence ADD + CMP + SELECT + SRA.
-      // The OperandValue properties many not be same as that of previous
-      // operation; conservatively assume OP_None.
-      InstructionCost Cost = getArithmeticInstrCost(
-          Instruction::Add, Ty, CostKind,
-          Op1Info.getNoProps(), Op2Info.getNoProps());
-      Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind,
-                                     Op1Info.getNoProps(), Op2Info.getNoProps());
-      Cost += getArithmeticInstrCost(
-          Instruction::Select, Ty, CostKind,
-          Op1Info.getNoProps(), Op2Info.getNoProps());
-      Cost += getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
-                                     Op1Info.getNoProps(), Op2Info.getNoProps());
-      return Cost;
+    /*
+    Notes for sdiv/srem specific costs:
+    1. This only considers the cases where the divisor is constant, uniform and
+    (pow-of-2/non-pow-of-2). Other cases are not important since they either
+    result in some form of (ldr + adrp), corresponding to constant vectors, or
+    scalarization of the division operation.
+    2. Constant divisors, either negative in whole or partially, don't result in
+    significantly 
diff erent codegen as compared to positive constant divisors.
+    So, we don't consider negative divisors seperately.
+    3. If the codegen is significantly 
diff erent with SVE, it has been indicated
+    using comments at appropriate places.
+
+    sdiv specific cases:
+    -----------------------------------------------------------------------
+    codegen                       | pow-of-2               | Type
+    -----------------------------------------------------------------------
+    add + cmp + csel + asr        | Y                      | i64
+    add + cmp + csel + asr        | Y                      | i32
+    -----------------------------------------------------------------------
+
+    srem specific cases:
+    -----------------------------------------------------------------------
+    codegen                       | pow-of-2               | Type
+    -----------------------------------------------------------------------
+    negs + and + and + csneg      | Y                      | i64
+    negs + and + and + csneg      | Y                      | i32
+    -----------------------------------------------------------------------
+
+    other sdiv/srem cases:
+    -------------------------------------------------------------------------
+    commom codegen            | + srem     | + sdiv     | pow-of-2  | Type
+    -------------------------------------------------------------------------
+    smulh + asr + add + add   | -          | -          | N         | i64
+    smull + lsr + add + add   | -          | -          | N         | i32
+    usra                      | and + sub  | sshr       | Y         | <2 x i64>
+    2 * (scalar code)         | -          | -          | N         | <2 x i64>
+    usra                      | bic + sub  | sshr + neg | Y         | <4 x i32>
+    smull2 + smull + uzp2     | mls        | -          | N         | <4 x i32>
+           + sshr  + usra     |            |            |           |
+    -------------------------------------------------------------------------
+    */
+    if (Op2Info.isConstant() && Op2Info.isUniform()) {
+      InstructionCost AddCost =
+          getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
+                                 Op1Info.getNoProps(), Op2Info.getNoProps());
+      InstructionCost AsrCost =
+          getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
+                                 Op1Info.getNoProps(), Op2Info.getNoProps());
+      InstructionCost MulCost =
+          getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
+                                 Op1Info.getNoProps(), Op2Info.getNoProps());
+      // add/cmp/csel/csneg should have similar cost while asr/negs/and should
+      // have similar cost.
+      auto VT = TLI->getValueType(DL, Ty);
+      if (LT.second.isScalarInteger() && VT.getSizeInBits() <= 64) {
+        if (Op2Info.isPowerOf2()) {
+          return ISD == ISD::SDIV ? (3 * AddCost + AsrCost)
+                                  : (3 * AsrCost + AddCost);
+        } else {
+          return MulCost + AsrCost + 2 * AddCost;
+        }
+      } else if (VT.isVector()) {
+        InstructionCost UsraCost = 2 * AsrCost;
+        if (Op2Info.isPowerOf2()) {
+          // Division with scalable types corresponds to native 'asrd'
+          // instruction when SVE is available.
+          // e.g. %1 = sdiv <vscale x 4 x i32> %a, splat (i32 8)
+          if (Ty->isScalableTy() && ST->hasSVE())
+            return 2 * AsrCost;
+          return UsraCost +
+                 (ISD == ISD::SDIV
+                      ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) *
+                            AsrCost
+                      : 2 * AddCost);
+        } else if (LT.second == MVT::v2i64) {
+          return VT.getVectorNumElements() *
+                 getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind,
+                                        Op1Info.getNoProps(),
+                                        Op2Info.getNoProps());
+        } else {
+          // When SVE is available, we get:
+          // smulh + lsr + add/sub + asr + add/sub.
+          if (Ty->isScalableTy() && ST->hasSVE())
+            return MulCost /*smulh cost*/ + 2 * AddCost + 2 * AsrCost;
+          return 2 * MulCost + AddCost /*uzp2 cost*/ + AsrCost + UsraCost;
+        }
+      }
+    }
+    if (Op2Info.isConstant() && !Op2Info.isUniform() &&
+        LT.second.isFixedLengthVector()) {
+      // FIXME: When the constant vector is non-uniform, this may result in
+      // loading the vector from constant pool or in some cases, may also result
+      // in scalarization. For now, we are approximating this with the
+      // scalarization cost.
+      auto ExtractCost = 2 * getVectorInstrCost(Instruction::ExtractElement, Ty,
+                                                CostKind, -1, nullptr, nullptr);
+      auto InsertCost = getVectorInstrCost(Instruction::InsertElement, Ty,
+                                           CostKind, -1, nullptr, nullptr);
+      unsigned NElts = cast<FixedVectorType>(Ty)->getNumElements();
+      return ExtractCost + InsertCost +
+             NElts * getArithmeticInstrCost(Opcode, Ty->getScalarType(),
+                                            CostKind, Op1Info.getNoProps(),
+                                            Op2Info.getNoProps());
     }
     [[fallthrough]];
   case ISD::UDIV:
@@ -3587,23 +3676,6 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
                                   AddCost * 2 + ShrCost;
         return DivCost + (ISD == ISD::UREM ? MulCost + AddCost : 0);
       }
-
-      // TODO: Fix SDIV and SREM costs, similar to the above.
-      if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT) &&
-          Op2Info.isUniform() && !VT.isScalableVector()) {
-        // Vector signed division by constant are expanded to the
-        // sequence MULHS + ADD/SUB + SRA + SRL + ADD.
-        InstructionCost MulCost =
-            getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
-                                   Op1Info.getNoProps(), Op2Info.getNoProps());
-        InstructionCost AddCost =
-            getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
-                                   Op1Info.getNoProps(), Op2Info.getNoProps());
-        InstructionCost ShrCost =
-            getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
-                                   Op1Info.getNoProps(), Op2Info.getNoProps());
-        return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
-      }
     }
 
     // div i128's are lowered as libcalls.  Pass nullptr as (u)divti3 calls are

diff  --git a/llvm/test/Analysis/CostModel/AArch64/div.ll b/llvm/test/Analysis/CostModel/AArch64/div.ll
index fba6d9ab31793..5f01710a1e60f 100644
--- a/llvm/test/Analysis/CostModel/AArch64/div.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/div.ll
@@ -288,28 +288,28 @@ define void @udiv_uniform() {
 define void @sdiv_const() {
 ; CHECK-LABEL: 'sdiv_const'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 6, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i32 = sdiv <2 x i32> undef, <i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i16 = sdiv <2 x i16> undef, <i16 4, i16 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i16 = sdiv <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 352 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i8 = sdiv <2 x i8> undef, <i8 4, i8 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i8 = sdiv <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i8 = sdiv <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 168 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 336 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 672 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 6, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i32 = sdiv <2 x i32> undef, <i32 4, i32 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i16 = sdiv <2 x i16> undef, <i16 4, i16 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i16 = sdiv <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i8 = sdiv <2 x i8> undef, <i8 4, i8 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i8 = sdiv <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i8 = sdiv <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I128 = sdiv i128 undef, 7
@@ -405,28 +405,28 @@ define void @udiv_const() {
 define void @sdiv_uniformconst() {
 ; CHECK-LABEL: 'sdiv_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I128 = sdiv i128 undef, 7
@@ -521,28 +521,28 @@ define void @udiv_uniformconst() {
 define void @sdiv_constpow2() {
 ; CHECK-LABEL: 'sdiv_constpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I64 = sdiv i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 8, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I32 = sdiv i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i32 = sdiv <2 x i32> undef, <i32 2, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = sdiv i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i16 = sdiv <2 x i16> undef, <i16 2, i16 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i16 = sdiv <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 352 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = sdiv i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i8 = sdiv <2 x i8> undef, <i8 2, i8 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i8 = sdiv <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i8 = sdiv <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 168 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 336 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 672 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 8, i64 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i32 = sdiv <2 x i32> undef, <i32 2, i32 4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i16 = sdiv <2 x i16> undef, <i16 2, i16 4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i16 = sdiv <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i8 = sdiv <2 x i8> undef, <i8 2, i8 4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i8 = sdiv <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i8 = sdiv <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I128 = sdiv i128 undef, 16
@@ -637,28 +637,28 @@ define void @udiv_constpow2() {
 define void @sdiv_uniformconstpow2() {
 ; CHECK-LABEL: 'sdiv_uniformconstpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I64 = sdiv i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I32 = sdiv i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = sdiv i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 204 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = sdiv i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 396 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I128 = sdiv i128 undef, 16
@@ -753,28 +753,28 @@ define void @udiv_uniformconstpow2() {
 define void @sdiv_constnegpow2() {
 ; CHECK-LABEL: 'sdiv_constnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -8, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i32 = sdiv <2 x i32> undef, <i32 -2, i32 -4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i16 = sdiv <2 x i16> undef, <i16 -2, i16 -4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i16 = sdiv <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 352 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i8 = sdiv <2 x i8> undef, <i8 -2, i8 -4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i8 = sdiv <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i8 = sdiv <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 168 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 336 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 672 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -8, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i32 = sdiv <2 x i32> undef, <i32 -2, i32 -4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i16 = sdiv <2 x i16> undef, <i16 -2, i16 -4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i16 = sdiv <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i8 = sdiv <2 x i8> undef, <i8 -2, i8 -4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i8 = sdiv <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i8 = sdiv <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I128 = sdiv i128 undef, -16
@@ -869,28 +869,28 @@ define void @udiv_constnegpow2() {
 define void @sdiv_uniformconstnegpow2() {
 ; CHECK-LABEL: 'sdiv_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I128 = sdiv i128 undef, -16

diff  --git a/llvm/test/Analysis/CostModel/AArch64/div_cte.ll b/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
index 68e287477042d..e92d827db02e2 100644
--- a/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
@@ -7,7 +7,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define <16 x i8> @sdiv8xi16(<16 x i8> %x) {
 ; CHECK-LABEL: 'sdiv8xi16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <16 x i8> %x, splat (i8 9)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <16 x i8> %x, splat (i8 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %div
 ;
   %div = sdiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
@@ -16,7 +16,7 @@ define <16 x i8> @sdiv8xi16(<16 x i8> %x) {
 
 define <8 x i16> @sdiv16xi8(<8 x i16> %x) {
 ; CHECK-LABEL: 'sdiv16xi8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <8 x i16> %x, splat (i16 9)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i16> %x, splat (i16 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div
 ;
   %div = sdiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
@@ -25,7 +25,7 @@ define <8 x i16> @sdiv16xi8(<8 x i16> %x) {
 
 define <4 x i32> @sdiv32xi4(<4 x i32> %x) {
 ; CHECK-LABEL: 'sdiv32xi4'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <4 x i32> %x, splat (i32 9)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <4 x i32> %x, splat (i32 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div
 ;
   %div = sdiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>

diff  --git a/llvm/test/Analysis/CostModel/AArch64/rem.ll b/llvm/test/Analysis/CostModel/AArch64/rem.ll
index f17533b713611..de38ba0507f62 100644
--- a/llvm/test/Analysis/CostModel/AArch64/rem.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/rem.ll
@@ -5,7 +5,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @srem() {
 ; CHECK-LABEL: 'srem'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %I128 = srem i128 undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = srem i128 undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = srem <2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = srem <4 x i64> undef, undef
@@ -287,29 +287,29 @@ define void @urem_uniform() {
 
 define void @srem_const() {
 ; CHECK-LABEL: 'srem_const'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %I128 = srem i128 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = srem i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = srem <2 x i64> undef, <i64 6, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = srem <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = srem <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = srem <2 x i32> undef, <i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i16 = srem <2 x i16> undef, <i16 4, i16 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i16 = srem <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i8 = srem <2 x i8> undef, <i8 4, i8 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = srem <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i8 = srem <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = srem i128 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2i64 = srem <2 x i64> undef, <i64 6, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = srem <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i64 = srem <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2i32 = srem <2 x i32> undef, <i32 4, i32 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4i32 = srem <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V16i32 = srem <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2i16 = srem <2 x i16> undef, <i16 4, i16 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4i16 = srem <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i16 = srem <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V16i16 = srem <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %V32i16 = srem <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2i8 = srem <2 x i8> undef, <i8 4, i8 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4i8 = srem <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i8 = srem <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V16i8 = srem <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %V32i8 = srem <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %V64i8 = srem <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I128 = srem i128 undef, 7
@@ -404,29 +404,29 @@ define void @urem_const() {
 
 define void @srem_uniformconst() {
 ; CHECK-LABEL: 'srem_uniformconst'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %I128 = srem i128 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = srem i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = srem i128 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I128 = srem i128 undef, 7
@@ -520,29 +520,29 @@ define void @urem_uniformconst() {
 
 define void @srem_constpow2() {
 ; CHECK-LABEL: 'srem_constpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %I128 = srem i128 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = srem <2 x i64> undef, <i64 8, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = srem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = srem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = srem i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = srem <2 x i32> undef, <i32 2, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i16 = srem <2 x i16> undef, <i16 2, i16 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i16 = srem <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I8 = srem i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i8 = srem <2 x i8> undef, <i8 2, i8 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = srem <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i8 = srem <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = srem i128 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2i64 = srem <2 x i64> undef, <i64 8, i64 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = srem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i64 = srem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2i32 = srem <2 x i32> undef, <i32 2, i32 4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4i32 = srem <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V16i32 = srem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2i16 = srem <2 x i16> undef, <i16 2, i16 4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4i16 = srem <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i16 = srem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V16i16 = srem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %V32i16 = srem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2i8 = srem <2 x i8> undef, <i8 2, i8 4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4i8 = srem <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i8 = srem <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V16i8 = srem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %V32i8 = srem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %V64i8 = srem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I128 = srem i128 undef, 16
@@ -636,29 +636,29 @@ define void @urem_constpow2() {
 
 define void @srem_uniformconstpow2() {
 ; CHECK-LABEL: 'srem_uniformconstpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %I128 = srem i128 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = srem i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 352 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I8 = srem i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 352 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 704 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = srem i128 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I128 = srem i128 undef, 16
@@ -752,29 +752,29 @@ define void @urem_uniformconstpow2() {
 
 define void @srem_constnegpow2() {
 ; CHECK-LABEL: 'srem_constnegpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %I128 = srem i128 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = srem i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -8, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = srem <2 x i32> undef, <i32 -2, i32 -4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i16 = srem <2 x i16> undef, <i16 -2, i16 -4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i16 = srem <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i8 = srem <2 x i8> undef, <i8 -2, i8 -4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = srem <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i8 = srem <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = srem i128 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -8, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2i32 = srem <2 x i32> undef, <i32 -2, i32 -4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2i16 = srem <2 x i16> undef, <i16 -2, i16 -4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4i16 = srem <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2i8 = srem <2 x i8> undef, <i8 -2, i8 -4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4i8 = srem <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i8 = srem <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I128 = srem i128 undef, -16
@@ -868,29 +868,29 @@ define void @urem_constnegpow2() {
 
 define void @srem_uniformconstnegpow2() {
 ; CHECK-LABEL: 'srem_uniformconstnegpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %I128 = srem i128 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = srem i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = srem i128 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I128 = srem i128 undef, -16

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-div.ll b/llvm/test/Analysis/CostModel/AArch64/sve-div.ll
index d20ebbf0ea9c1..c38cdcd8b9a34 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-div.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-div.ll
@@ -163,42 +163,42 @@ define void @udiv() {
 
 define void @sdiv_uniformconst() {
 ; CHECK-LABEL: 'sdiv_uniformconst'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i32 = sdiv <vscale x 2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV8i32 = sdiv <vscale x 8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV16i32 = sdiv <vscale x 16 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i16 = sdiv <vscale x 2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i16 = sdiv <vscale x 4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i16 = sdiv <vscale x 16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %NV32i16 = sdiv <vscale x 32 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i8 = sdiv <vscale x 2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i8 = sdiv <vscale x 4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i8 = sdiv <vscale x 8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %NV32i8 = sdiv <vscale x 32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %NV64i8 = sdiv <vscale x 64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i32 = sdiv <vscale x 2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i32 = sdiv <vscale x 8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV16i32 = sdiv <vscale x 16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i16 = sdiv <vscale x 2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV4i16 = sdiv <vscale x 4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV16i16 = sdiv <vscale x 16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV32i16 = sdiv <vscale x 32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i8 = sdiv <vscale x 2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV4i8 = sdiv <vscale x 4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV8i8 = sdiv <vscale x 8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV32i8 = sdiv <vscale x 32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV64i8 = sdiv <vscale x 64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
@@ -321,42 +321,42 @@ define void @udiv_uniformconst() {
 
 define void @sdiv_uniformconstpow2() {
 ; CHECK-LABEL: 'sdiv_uniformconstpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 204 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 396 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i32 = sdiv <vscale x 2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i32 = sdiv <vscale x 8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV16i32 = sdiv <vscale x 16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i16 = sdiv <vscale x 2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV4i16 = sdiv <vscale x 4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV16i16 = sdiv <vscale x 16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV32i16 = sdiv <vscale x 32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i8 = sdiv <vscale x 2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV4i8 = sdiv <vscale x 4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV8i8 = sdiv <vscale x 8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV32i8 = sdiv <vscale x 32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV64i8 = sdiv <vscale x 64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i32 = sdiv <vscale x 2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV8i32 = sdiv <vscale x 8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV16i32 = sdiv <vscale x 16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i16 = sdiv <vscale x 2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i16 = sdiv <vscale x 4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV16i16 = sdiv <vscale x 16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV32i16 = sdiv <vscale x 32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i8 = sdiv <vscale x 2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i8 = sdiv <vscale x 4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV8i8 = sdiv <vscale x 8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV32i8 = sdiv <vscale x 32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV64i8 = sdiv <vscale x 64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
@@ -479,42 +479,42 @@ define void @udiv_uniformconstpow2() {
 
 define void @sdiv_uniformconstnegpow2() {
 ; CHECK-LABEL: 'sdiv_uniformconstnegpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i32 = sdiv <vscale x 2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV8i32 = sdiv <vscale x 8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV16i32 = sdiv <vscale x 16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i16 = sdiv <vscale x 2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i16 = sdiv <vscale x 4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i16 = sdiv <vscale x 16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %NV32i16 = sdiv <vscale x 32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i8 = sdiv <vscale x 2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i8 = sdiv <vscale x 4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i8 = sdiv <vscale x 8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %NV32i8 = sdiv <vscale x 32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %NV64i8 = sdiv <vscale x 64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = sdiv <2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i16 = sdiv <4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = sdiv <2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i32 = sdiv <vscale x 2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i32 = sdiv <vscale x 8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV16i32 = sdiv <vscale x 16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i16 = sdiv <vscale x 2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV4i16 = sdiv <vscale x 4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV16i16 = sdiv <vscale x 16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV32i16 = sdiv <vscale x 32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i8 = sdiv <vscale x 2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV4i8 = sdiv <vscale x 4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV8i8 = sdiv <vscale x 8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV32i8 = sdiv <vscale x 32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV64i8 = sdiv <vscale x 64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll b/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll
index d18ed3296585b..ed88f1b390b68 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll
@@ -163,42 +163,42 @@ define void @urem() {
 
 define void @srem_uniformconst() {
 ; CHECK-LABEL: 'srem_uniformconst'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V2i64 = srem <2 x i64> undef, splat (i64 7)
@@ -321,42 +321,42 @@ define void @urem_uniformconst() {
 
 define void @srem_uniformconstpow2() {
 ; CHECK-LABEL: 'srem_uniformconstpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 116 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 106 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 212 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 101 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 404 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V2i64 = srem <2 x i64> undef, splat (i64 16)
@@ -479,42 +479,42 @@ define void @urem_uniformconstpow2() {
 
 define void @srem_uniformconstnegpow2() {
 ; CHECK-LABEL: 'srem_uniformconstnegpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = srem <2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = srem <2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i16 = srem <4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = srem <2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i8 = srem <4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i8 = srem <8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V2i64 = srem <2 x i64> undef, splat (i64 -16)

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
index 254cdf2d14d9f..a07da0744fcf7 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
@@ -428,10 +428,10 @@ define void @test_blend_feeding_replicated_store_3(ptr noalias %src.1, ptr noali
 ; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; CHECK:       [[LOOP_HEADER]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[L_1:%.*]] = load i8, ptr [[SRC_1]], align 1
-; CHECK-NEXT:    [[EXT:%.*]] = zext i8 [[L_1]] to i32
+; CHECK-NEXT:    [[L_3:%.*]] = load i8, ptr [[SRC_1]], align 1
+; CHECK-NEXT:    [[EXT:%.*]] = zext i8 [[L_3]] to i32
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[X]], [[EXT]]
-; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[MUL]], 255
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[MUL]], [[EXT]]
 ; CHECK-NEXT:    [[L_2:%.*]] = load i8, ptr [[SRC_2]], align 1
 ; CHECK-NEXT:    [[C_1:%.*]] = icmp eq i8 [[L_2]], 0
 ; CHECK-NEXT:    br i1 [[C_1]], label %[[THEN:.*]], label %[[ELSE_1:.*]]
@@ -459,7 +459,7 @@ loop.header:
   %l.1 = load i8, ptr %src.1, align 1
   %ext = zext i8 %l.1 to i32
   %mul = mul i32 %x, %ext
-  %div = sdiv i32 %mul, 255
+  %div = sdiv i32 %mul, %ext
   %l.2 = load i8, ptr %src.2, align 1
   %c.1 = icmp eq i8 %l.2, 0
   br i1 %c.1, label %then, label %else.1

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll
index bb88edff11634..e9cf1deac8eed 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll
@@ -433,26 +433,10 @@ define <8 x i16> @slp_v8i16_Op1_Op2_unknown(<8 x i16> %a, <8 x i16> %b)
 }
 
 define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const(<4 x i32> %a)
-; NO-SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const(
-; NO-SVE-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
-; NO-SVE-NEXT:    [[A0:%.*]] = extractelement <4 x i32> [[A]], i32 0
-; NO-SVE-NEXT:    [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1
-; NO-SVE-NEXT:    [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2
-; NO-SVE-NEXT:    [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
-; NO-SVE-NEXT:    [[TMP1:%.*]] = sdiv i32 [[A0]], 1
-; NO-SVE-NEXT:    [[TMP2:%.*]] = sdiv i32 [[A1]], 3
-; NO-SVE-NEXT:    [[TMP3:%.*]] = sdiv i32 [[A2]], 5
-; NO-SVE-NEXT:    [[TMP4:%.*]] = sdiv i32 [[A3]], 7
-; NO-SVE-NEXT:    [[R0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
-; NO-SVE-NEXT:    [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[TMP2]], i32 1
-; NO-SVE-NEXT:    [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[TMP3]], i32 2
-; NO-SVE-NEXT:    [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[TMP4]], i32 3
-; NO-SVE-NEXT:    ret <4 x i32> [[R3]]
-;
-; SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const(
-; SVE-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
-; SVE-NEXT:    [[TMP1:%.*]] = sdiv <4 x i32> [[A]], <i32 1, i32 3, i32 5, i32 7>
-; SVE-NEXT:    ret <4 x i32> [[TMP1]]
+; CHECK-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const(
+; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <4 x i32> [[A]], <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
 {
   %a0 = extractelement <4 x i32> %a, i32 0
@@ -472,7 +456,7 @@ define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const(<4 x i32> %a)
 
 define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const(<4 x i32> %a)
 ; CHECK-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const(
-; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <4 x i32> [[A]], splat (i32 5)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
@@ -515,26 +499,10 @@ define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const_pow2(<4 x i32> %a)
 }
 
 define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2(<4 x i32> %a)
-; NO-SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2(
-; NO-SVE-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
-; NO-SVE-NEXT:    [[A0:%.*]] = extractelement <4 x i32> [[A]], i32 0
-; NO-SVE-NEXT:    [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1
-; NO-SVE-NEXT:    [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2
-; NO-SVE-NEXT:    [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
-; NO-SVE-NEXT:    [[TMP1:%.*]] = sdiv i32 [[A0]], 1
-; NO-SVE-NEXT:    [[TMP2:%.*]] = sdiv i32 [[A1]], 2
-; NO-SVE-NEXT:    [[TMP3:%.*]] = sdiv i32 [[A2]], 4
-; NO-SVE-NEXT:    [[TMP4:%.*]] = sdiv i32 [[A3]], 8
-; NO-SVE-NEXT:    [[R0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
-; NO-SVE-NEXT:    [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[TMP2]], i32 1
-; NO-SVE-NEXT:    [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[TMP3]], i32 2
-; NO-SVE-NEXT:    [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[TMP4]], i32 3
-; NO-SVE-NEXT:    ret <4 x i32> [[R3]]
-;
-; SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2(
-; SVE-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
-; SVE-NEXT:    [[TMP1:%.*]] = sdiv <4 x i32> [[A]], <i32 1, i32 2, i32 4, i32 8>
-; SVE-NEXT:    ret <4 x i32> [[TMP1]]
+; CHECK-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2(
+; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <4 x i32> [[A]], <i32 1, i32 2, i32 4, i32 8>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
 {
   %a0 = extractelement <4 x i32> %a, i32 0


        


More information about the llvm-commits mailing list