[llvm] [AArch64] Improve urem by constant costs (PR #122236)

David Green via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 1 05:39:24 PST 2025


https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/122236

>From fb512c9b63c41433e627b104df729683222e9e00 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 10 Jan 2025 15:05:44 +0000
Subject: [PATCH 1/2] [TTI][AArch64] Detect OperandInfo from scalable splats.

Pulled out of #122236, this allows Splats contants to be recognized in by
getOperandInfo, allowing "better" costs for instructions like divides by
constants to be produced (which are expanded into mul+add+shift). Some of the
costs are not very accurate yet, but the comparison of scalar vs fixed-with vs
scalable for the same fiv can become more accurate, especially with patches
like #122236.
---
 llvm/lib/Analysis/TargetTransformInfo.cpp     | 43 ++++++++++---------
 .../AArch64/AArch64TargetTransformInfo.cpp    |  3 +-
 .../Analysis/CostModel/AArch64/sve-div.ll     | 36 ++++++++--------
 .../CostModel/AArch64/sve-intrinsics.ll       | 32 +++++++-------
 .../Analysis/CostModel/AArch64/sve-rem.ll     | 36 ++++++++--------
 .../CostModel/RISCV/rvv-load-store.ll         |  8 ++--
 .../AArch64/simple_early_exit.ll              | 42 ++++++++++++------
 7 files changed, 109 insertions(+), 91 deletions(-)

diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 424bb7be233836..ab223900aa9f37 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -902,9 +902,12 @@ TargetTransformInfo::getOperandInfo(const Value *V) {
 
   // Check for a splat of a constant or for a non uniform vector of constants
   // and check if the constant(s) are all powers of two.
-  if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
-    OpInfo = OK_NonUniformConstantValue;
-    if (Splat) {
+  if (Splat) {
+    // Check for a splat of a uniform value. This is not loop aware, so return
+    // true only for the obviously uniform cases (argument, globalvalue)
+    if (isa<Argument>(Splat) || isa<GlobalValue>(Splat)) {
+      OpInfo = OK_UniformValue;
+    } else if (isa<Constant>(Splat)) {
       OpInfo = OK_UniformConstantValue;
       if (auto *CI = dyn_cast<ConstantInt>(Splat)) {
         if (CI->getValue().isPowerOf2())
@@ -912,28 +915,26 @@ TargetTransformInfo::getOperandInfo(const Value *V) {
         else if (CI->getValue().isNegatedPowerOf2())
           OpProps = OP_NegatedPowerOf2;
       }
-    } else if (const auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
-      bool AllPow2 = true, AllNegPow2 = true;
-      for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
-        if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I))) {
-          AllPow2 &= CI->getValue().isPowerOf2();
-          AllNegPow2 &= CI->getValue().isNegatedPowerOf2();
-          if (AllPow2 || AllNegPow2)
-            continue;
-        }
-        AllPow2 = AllNegPow2 = false;
-        break;
+    }
+  } else if (const auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
+    OpInfo = OK_NonUniformConstantValue;
+    bool AllPow2 = true, AllNegPow2 = true;
+    for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
+      if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I))) {
+        AllPow2 &= CI->getValue().isPowerOf2();
+        AllNegPow2 &= CI->getValue().isNegatedPowerOf2();
+        if (AllPow2 || AllNegPow2)
+          continue;
       }
-      OpProps = AllPow2 ? OP_PowerOf2 : OpProps;
-      OpProps = AllNegPow2 ? OP_NegatedPowerOf2 : OpProps;
+      AllPow2 = AllNegPow2 = false;
+      break;
     }
+    OpProps = AllPow2 ? OP_PowerOf2 : OpProps;
+    OpProps = AllNegPow2 ? OP_NegatedPowerOf2 : OpProps;
+  } else if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
+    OpInfo = OK_NonUniformConstantValue;
   }
 
-  // Check for a splat of a uniform value. This is not loop aware, so return
-  // true only for the obviously uniform cases (argument, globalvalue)
-  if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
-    OpInfo = OK_UniformValue;
-
   return {OpInfo, OpProps};
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index aae2fdaf5bec37..6c5fe0b679041d 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3522,7 +3522,8 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
   case ISD::UDIV: {
     auto VT = TLI->getValueType(DL, Ty);
     if (Op2Info.isConstant() && Op2Info.isUniform()) {
-      if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
+      if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT) &&
+          !VT.isScalableVector()) {
         // Vector signed division by constant are expanded to the
         // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
         // to MULHS + SUB + SRL + ADD + SRL.
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-div.ll b/llvm/test/Analysis/CostModel/AArch64/sve-div.ll
index 4c25e3003177d9..33bc4ebe3c3edf 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-div.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-div.ll
@@ -339,24 +339,24 @@ define void @sdiv_uniformconstpow2() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 396 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i32 = sdiv <vscale x 2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV8i32 = sdiv <vscale x 8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV16i32 = sdiv <vscale x 16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i16 = sdiv <vscale x 2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i16 = sdiv <vscale x 4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i16 = sdiv <vscale x 16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %NV32i16 = sdiv <vscale x 32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i8 = sdiv <vscale x 2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i8 = sdiv <vscale x 4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i8 = sdiv <vscale x 8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %NV32i8 = sdiv <vscale x 32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %NV64i8 = sdiv <vscale x 64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i32 = sdiv <vscale x 2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i32 = sdiv <vscale x 8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV16i32 = sdiv <vscale x 16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i16 = sdiv <vscale x 2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV4i16 = sdiv <vscale x 4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV16i16 = sdiv <vscale x 16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV32i16 = sdiv <vscale x 32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV2i8 = sdiv <vscale x 2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV4i8 = sdiv <vscale x 4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV8i8 = sdiv <vscale x 8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV32i8 = sdiv <vscale x 32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV64i8 = sdiv <vscale x 64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index 696dec91d93d20..78a0ccfde88c06 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -1011,17 +1011,17 @@ define void @get_lane_mask() #0 {
 
 define void @fshr() #0 {
 ; CHECK-VSCALE-1-LABEL: 'fshr'
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-VSCALE-2-LABEL: 'fshr'
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'fshr'
@@ -1040,17 +1040,17 @@ define void @fshr() #0 {
 
 define void @fshl() #0 {
 ; CHECK-VSCALE-1-LABEL: 'fshl'
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-VSCALE-2-LABEL: 'fshl'
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'fshl'
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll b/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll
index 10455c06b3f456..c1591fe5fde331 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll
@@ -339,24 +339,24 @@ define void @srem_uniformconstpow2() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 101 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 404 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V2i64 = srem <2 x i64> undef, splat (i64 16)
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll
index 6fc98da0eea97e..7bb1efedc40bf5 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll
@@ -512,8 +512,8 @@ define void @store_of_constant(ptr %p) {
 ; CHECK-LABEL: 'store_of_constant'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> poison, ptr %p, align 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr %p, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> zeroinitializer, ptr %p, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> zeroinitializer, ptr %p, align 32
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> zeroinitializer, ptr %p, align 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <4 x i64> zeroinitializer, ptr %p, align 32
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 1), ptr %p, align 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <4 x i64> splat (i64 1), ptr %p, align 32
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 4096), ptr %p, align 16
@@ -530,8 +530,8 @@ define void @store_of_constant(ptr %p) {
 ; CODESIZE-LABEL: 'store_of_constant'
 ; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> poison, ptr %p, align 16
 ; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr %p, align 16
-; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> zeroinitializer, ptr %p, align 16
-; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> zeroinitializer, ptr %p, align 32
+; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> zeroinitializer, ptr %p, align 16
+; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> zeroinitializer, ptr %p, align 32
 ; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 1), ptr %p, align 16
 ; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> splat (i64 1), ptr %p, align 32
 ; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 4096), ptr %p, align 16
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
index 9d21ea0ab6de39..39f93c290ed17f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
@@ -272,36 +272,52 @@ define i64 @loop_contains_safe_div() #1 {
 ; CHECK-NEXT:    [[P2:%.*]] = alloca [1024 x i8], align 4
 ; CHECK-NEXT:    call void @init_mem(ptr [[P1]], i64 1024)
 ; CHECK-NEXT:    call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP11:%.*]] = mul i64 [[TMP10]], 4
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
+; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP12]], 4
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]]
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT:    [[TMP16:%.*]] = add i64 3, [[N_VEC]]
+; CHECK-NEXT:    [[TMP17:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
+; CHECK-NEXT:    [[TMP18:%.*]] = mul <vscale x 4 x i64> [[TMP17]], splat (i64 1)
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add <vscale x 4 x i64> splat (i64 3), [[TMP18]]
+; CHECK-NEXT:    [[TMP9:%.*]] = mul i64 1, [[TMP5]]
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 3, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 1
-; CHECK-NEXT:    [[TMP3:%.*]] = udiv <2 x i32> [[WIDE_LOAD]], splat (i32 20000)
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <2 x i32> [[TMP3]], splat (i32 1)
-; CHECK-NEXT:    [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 2
-; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true)
-; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP5]])
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP2]], align 1
+; CHECK-NEXT:    [[TMP13:%.*]] = udiv <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 20000)
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq <vscale x 4 x i32> [[TMP13]], splat (i32 1)
+; CHECK-NEXT:    [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP5]]
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <vscale x 4 x i1> [[TMP14]], splat (i1 true)
+; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP15]])
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.split:
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
 ; CHECK:       vector.early.exit:
-; CHECK-NEXT:    [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> [[TMP5]], i1 true)
-; CHECK-NEXT:    [[EARLY_EXIT_VALUE:%.*]] = extractelement <2 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT:    [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> [[TMP15]], i1 true)
+; CHECK-NEXT:    [[EARLY_EXIT_VALUE:%.*]] = extractelement <vscale x 4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
 ; CHECK-NEXT:    br label [[LOOP_END:%.*]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    br i1 true, label [[LOOP_END]], label [[SCALAR_PH]]
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_END]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]

>From b0c15cd461e1ac9c9220910c9a35fb17ffebade4 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Sat, 1 Feb 2025 13:37:45 +0000
Subject: [PATCH 2/2] [AArch64] Improve urem by constant costs

A urem by a constant, much like a udiv by a constant, can be expanded into a
series of mul/add/shift instructions. The exact sequence of instructions
depends on the constants and the types.

If the constant is a power-2 then a shift / and will be used, so the cost will
be 1. This canonicalization happens relatively early so this likely has very
little effect in practice (it does help the cost of funnel shifts).

For a non-power 2 the code for div will expand to a series of UMULH + Add +
Shift + Add, depending on the constant. urem is generally udiv + mul + sub, so
involves a few extra instructions. The UMULH is not always available, i32 will
use umull+shift, and vector types will use umull+shift or umull+umull2+uzp
depending on the vector size. v2i64 will be scalarized because there is no mul
available. SVE does have a UMULH instruction.

The end result is that the costs should be closer to reality, with scalable
types a little lower cost than the fixed-width versions. (In the future we
might be able to use umulh for fixed-width when the SVE instruction is
available, but for the moment this should favour scalable vectorization a
little).

I've tried to make this patch only apply to constant UREM/UDIV instructions.
SDIV and SREM are left until a later patch to prevent this becoming too
complex. The funnel shift costs are changing as it believes it will need a urem
to clamp the shift amount, which should be a power-2 value for most common
types.
---
 .../AArch64/AArch64TargetTransformInfo.cpp    |  62 +++-
 llvm/test/Analysis/CostModel/AArch64/div.ll   | 256 ++++++++---------
 .../Analysis/CostModel/AArch64/div_cte.ll     |   6 +-
 llvm/test/Analysis/CostModel/AArch64/fshl.ll  |  16 +-
 llvm/test/Analysis/CostModel/AArch64/fshr.ll  |  16 +-
 llvm/test/Analysis/CostModel/AArch64/rem.ll   | 270 +++++++++---------
 .../Analysis/CostModel/AArch64/sve-div.ll     | 218 +++++++-------
 .../CostModel/AArch64/sve-intrinsics.ll       |  32 +--
 .../Analysis/CostModel/AArch64/sve-rem.ll     | 220 +++++++-------
 9 files changed, 567 insertions(+), 529 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 6c5fe0b679041d..94be8f0df6756e 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3519,20 +3519,58 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
       return Cost;
     }
     [[fallthrough]];
-  case ISD::UDIV: {
+  case ISD::UDIV:
+  case ISD::UREM: {
     auto VT = TLI->getValueType(DL, Ty);
-    if (Op2Info.isConstant() && Op2Info.isUniform()) {
+    if (Op2Info.isConstant()) {
+      // If the operand is a power of 2 we can use the shift or and cost.
+      if (ISD == ISD::UDIV && Op2Info.isPowerOf2())
+        return getArithmeticInstrCost(Instruction::LShr, Ty, CostKind,
+                                      Op1Info.getNoProps(),
+                                      Op2Info.getNoProps());
+      if (ISD == ISD::UREM && Op2Info.isPowerOf2())
+        return getArithmeticInstrCost(Instruction::And, Ty, CostKind,
+                                      Op1Info.getNoProps(),
+                                      Op2Info.getNoProps());
+
+      if (ISD == ISD::UDIV || ISD == ISD::UREM) {
+        // Divides by a constant are expanded to MULHU + SUB + SRL + ADD + SRL.
+        // The MULHU will be expanded to UMULL for the types not listed below,
+        // and will become a pair of UMULL+MULL2 for 128bit vectors.
+        bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
+                       LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
+                       LT.second == MVT::nxv16i8;
+        bool Is128bit = LT.second.is128BitVector();
+
+        InstructionCost MulCost =
+            getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
+                                   Op1Info.getNoProps(), Op2Info.getNoProps());
+        InstructionCost AddCost =
+            getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
+                                   Op1Info.getNoProps(), Op2Info.getNoProps());
+        InstructionCost ShrCost =
+            getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
+                                   Op1Info.getNoProps(), Op2Info.getNoProps());
+        InstructionCost DivCost = MulCost * (Is128bit ? 2 : 1) + // UMULL/UMULH
+                                  (HasMULH ? 0 : ShrCost) +      // UMULL shift
+                                  AddCost * 2 + ShrCost;
+        return DivCost + (ISD == ISD::UREM ? MulCost + AddCost : 0);
+      }
+
+      // TODOD: Fix SDIV and SREM costs, similar to the above.
       if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT) &&
-          !VT.isScalableVector()) {
+          Op2Info.isUniform()) {
         // Vector signed division by constant are expanded to the
-        // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
-        // to MULHS + SUB + SRL + ADD + SRL.
-        InstructionCost MulCost = getArithmeticInstrCost(
-            Instruction::Mul, Ty, CostKind, Op1Info.getNoProps(), Op2Info.getNoProps());
-        InstructionCost AddCost = getArithmeticInstrCost(
-            Instruction::Add, Ty, CostKind, Op1Info.getNoProps(), Op2Info.getNoProps());
-        InstructionCost ShrCost = getArithmeticInstrCost(
-            Instruction::AShr, Ty, CostKind, Op1Info.getNoProps(), Op2Info.getNoProps());
+        // sequence MULHS + ADD/SUB + SRA + SRL + ADD.
+        InstructionCost MulCost =
+            getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
+                                   Op1Info.getNoProps(), Op2Info.getNoProps());
+        InstructionCost AddCost =
+            getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
+                                   Op1Info.getNoProps(), Op2Info.getNoProps());
+        InstructionCost ShrCost =
+            getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
+                                   Op1Info.getNoProps(), Op2Info.getNoProps());
         return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
       }
     }
@@ -3545,7 +3583,7 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
 
     InstructionCost Cost = BaseT::getArithmeticInstrCost(
         Opcode, Ty, CostKind, Op1Info, Op2Info);
-    if (Ty->isVectorTy()) {
+    if (Ty->isVectorTy() && (ISD == ISD::SDIV || ISD == ISD::UDIV)) {
       if (TLI->isOperationLegalOrCustom(ISD, LT.second) && ST->hasSVE()) {
         // SDIV/UDIV operations are lowered using SVE, then we can have less
         // costs.
diff --git a/llvm/test/Analysis/CostModel/AArch64/div.ll b/llvm/test/Analysis/CostModel/AArch64/div.ll
index ef52d0db01eefd..fcf567f37b665a 100644
--- a/llvm/test/Analysis/CostModel/AArch64/div.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/div.ll
@@ -180,28 +180,28 @@ define i32 @sdiv_const() {
 define i32 @udiv_const() {
 ; CHECK-LABEL: 'udiv_const'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = udiv i128 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 6, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i32 = udiv <2 x i32> undef, <i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i16 = udiv <2 x i16> undef, <i16 4, i16 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i16 = udiv <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 352 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i8 = udiv <2 x i8> undef, <i8 4, i8 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i8 = udiv <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i8 = udiv <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 168 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 336 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 672 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 6, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I32 = udiv i32 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i32 = udiv <2 x i32> undef, <i32 4, i32 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = udiv i16 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i16 = udiv <2 x i16> undef, <i16 4, i16 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = udiv <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = udiv i8 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = udiv <2 x i8> undef, <i8 4, i8 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i8 = udiv <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i8 = udiv <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 
@@ -297,28 +297,28 @@ define i32 @sdiv_uniformconst() {
 define i32 @udiv_uniformconst() {
 ; CHECK-LABEL: 'udiv_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = udiv i128 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i32 = udiv <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i16 = udiv <2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i16 = udiv <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i8 = udiv <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i8 = udiv <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i8 = udiv <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I32 = udiv i32 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i32 = udiv <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = udiv i16 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i16 = udiv <2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = udiv <4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = udiv i8 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = udiv <2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i8 = udiv <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i8 = udiv <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I128 = udiv i128 undef, 7
@@ -412,29 +412,29 @@ define i32 @sdiv_constpow2() {
 
 define i32 @udiv_constpow2() {
 ; CHECK-LABEL: 'udiv_constpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = udiv i128 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 8, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I128 = udiv i128 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 8, i64 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i32 = udiv <2 x i32> undef, <i32 2, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = udiv <2 x i32> undef, <i32 2, i32 4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i16 = udiv <2 x i16> undef, <i16 2, i16 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i16 = udiv <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 352 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = udiv <2 x i16> undef, <i16 2, i16 4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = udiv <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i8 = udiv <2 x i8> undef, <i8 2, i8 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i8 = udiv <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i8 = udiv <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 168 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 336 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 672 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = udiv <2 x i8> undef, <i8 2, i8 4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = udiv <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = udiv <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I128 = udiv i128 undef, 16
@@ -528,29 +528,29 @@ define i32 @sdiv_uniformconstpow2() {
 
 define i32 @udiv_uniformconstpow2() {
 ; CHECK-LABEL: 'udiv_uniformconstpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = udiv i128 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I128 = udiv i128 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i32 = udiv <2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = udiv <2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i16 = udiv <2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i16 = udiv <4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = udiv <2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = udiv <4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i8 = udiv <2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i8 = udiv <4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i8 = udiv <8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = udiv <2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = udiv <4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = udiv <8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I128 = udiv i128 undef, 16
@@ -645,28 +645,28 @@ define i32 @sdiv_constnegpow2() {
 define i32 @udiv_constnegpow2() {
 ; CHECK-LABEL: 'udiv_constnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = udiv i128 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -8, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i32 = udiv <2 x i32> undef, <i32 -2, i32 -4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i16 = udiv <2 x i16> undef, <i16 -2, i16 -4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i16 = udiv <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 352 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2i8 = udiv <2 x i8> undef, <i8 -2, i8 -4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i8 = udiv <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i8 = udiv <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 168 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 336 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 672 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -8, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I32 = udiv i32 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i32 = udiv <2 x i32> undef, <i32 -2, i32 -4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = udiv i16 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i16 = udiv <2 x i16> undef, <i16 -2, i16 -4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = udiv <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = udiv i8 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = udiv <2 x i8> undef, <i8 -2, i8 -4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i8 = udiv <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i8 = udiv <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I128 = udiv i128 undef, -16
@@ -761,28 +761,28 @@ define i32 @sdiv_uniformconstnegpow2() {
 define i32 @udiv_uniformconstnegpow2() {
 ; CHECK-LABEL: 'udiv_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = udiv i128 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i32 = udiv <2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i16 = udiv <2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i16 = udiv <4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i8 = udiv <2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i8 = udiv <4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i8 = udiv <8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I32 = udiv i32 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i32 = udiv <2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = udiv i16 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i16 = udiv <2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = udiv <4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = udiv i8 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = udiv <2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i8 = udiv <4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i8 = udiv <8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I128 = udiv i128 undef, -16
diff --git a/llvm/test/Analysis/CostModel/AArch64/div_cte.ll b/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
index dd93aed53c0f2a..68e287477042d9 100644
--- a/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
@@ -34,7 +34,7 @@ define <4 x i32> @sdiv32xi4(<4 x i32> %x) {
 
 define <16 x i8> @udiv8xi16(<16 x i8> %x) {
 ; CHECK-LABEL: 'udiv8xi16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <16 x i8> %x, splat (i8 9)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <16 x i8> %x, splat (i8 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %div
 ;
   %div = udiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
@@ -43,7 +43,7 @@ define <16 x i8> @udiv8xi16(<16 x i8> %x) {
 
 define <8 x i16> @udiv16xi8(<8 x i16> %x) {
 ; CHECK-LABEL: 'udiv16xi8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <8 x i16> %x, splat (i16 9)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <8 x i16> %x, splat (i16 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div
 ;
   %div = udiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
@@ -52,7 +52,7 @@ define <8 x i16> @udiv16xi8(<8 x i16> %x) {
 
 define <4 x i32> @udiv32xi4(<4 x i32> %x) {
 ; CHECK-LABEL: 'udiv32xi4'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <4 x i32> %x, splat (i32 9)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <4 x i32> %x, splat (i32 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div
 ;
   %div = udiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
diff --git a/llvm/test/Analysis/CostModel/AArch64/fshl.ll b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
index 632f26dfa53829..1ddc995366bccc 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshl.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
@@ -15,7 +15,7 @@ entry:
 
 define i8 @fshl_i8_3rd_arg_var(i8 %a, i8 %b, i8 %c) {
 ; CHECK-LABEL: 'fshl_i8_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshl
 ;
 entry:
@@ -49,7 +49,7 @@ entry:
 
 define i32 @fshl_i32_3rd_arg_var(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: 'fshl_i32_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshl
 ;
 entry:
@@ -71,7 +71,7 @@ entry:
 
 define i64 @fshl_i64_3rd_arg_var(i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: 'fshl_i64_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshl
 ;
 entry:
@@ -116,7 +116,7 @@ entry:
 
 define <16 x i8> @fshl_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
 ; CHECK-LABEL: 'fshl_v16i8_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshl
 ;
 entry:
@@ -148,7 +148,7 @@ entry:
 
 define <8 x i16> @fshl_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
 ; CHECK-LABEL: 'fshl_v8i16_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshl
 ;
 entry:
@@ -180,7 +180,7 @@ entry:
 
 define <4 x i32> @fshl_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: 'fshl_v4i32_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshl
 ;
 entry:
@@ -212,7 +212,7 @@ entry:
 
 define <2 x i64> @fshl_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
 ; CHECK-LABEL: 'fshl_v2i64_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshl
 ;
 entry:
@@ -224,7 +224,7 @@ declare <2 x i64> @llvm.fshl.v4i64(<2 x i64>, <2 x i64>, <2 x i64>)
 
 define <4 x i30> @fshl_v4i30_3rd_arg_var(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) {
 ; CHECK-LABEL: 'fshl_v4i30_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %fshl = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fshl = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i30> %fshl
 ;
 entry:
diff --git a/llvm/test/Analysis/CostModel/AArch64/fshr.ll b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
index a0a579ae96a9ba..438690f76f019e 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshr.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
@@ -15,7 +15,7 @@ entry:
 
 define i8 @fshr_i8_3rd_arg_var(i8 %a, i8 %b, i8 %c) {
 ; CHECK-LABEL: 'fshr_i8_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshr
 ;
 entry:
@@ -49,7 +49,7 @@ entry:
 
 define i32 @fshr_i32_3rd_arg_var(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: 'fshr_i32_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshr
 ;
 entry:
@@ -71,7 +71,7 @@ entry:
 
 define i64 @fshr_i64_3rd_arg_var(i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: 'fshr_i64_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshr
 ;
 entry:
@@ -116,7 +116,7 @@ entry:
 
 define <16 x i8> @fshr_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
 ; CHECK-LABEL: 'fshr_v16i8_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshr
 ;
 entry:
@@ -148,7 +148,7 @@ entry:
 
 define <8 x i16> @fshr_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
 ; CHECK-LABEL: 'fshr_v8i16_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshr
 ;
 entry:
@@ -180,7 +180,7 @@ entry:
 
 define <4 x i32> @fshr_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: 'fshr_v4i32_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshr
 ;
 entry:
@@ -212,7 +212,7 @@ entry:
 
 define <2 x i64> @fshr_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
 ; CHECK-LABEL: 'fshr_v2i64_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshr
 ;
 entry:
@@ -224,7 +224,7 @@ declare <2 x i64> @llvm.fshr.v4i64(<2 x i64>, <2 x i64>, <2 x i64>)
 
 define <4 x i30> @fshr_v4i30_3rd_arg_var(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) {
 ; CHECK-LABEL: 'fshr_v4i30_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %fshr = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fshr = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i30> %fshr
 ;
 entry:
diff --git a/llvm/test/Analysis/CostModel/AArch64/rem.ll b/llvm/test/Analysis/CostModel/AArch64/rem.ll
index 06c05aefedf2be..a169a612ac992d 100644
--- a/llvm/test/Analysis/CostModel/AArch64/rem.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/rem.ll
@@ -63,7 +63,7 @@ define i32 @srem() {
 
 define i32 @urem() {
 ; CHECK-LABEL: 'urem'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %I128 = urem i128 undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = urem i128 undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = urem <2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = urem <4 x i64> undef, undef
@@ -180,28 +180,28 @@ define i32 @srem_const() {
 define i32 @urem_const() {
 ; CHECK-LABEL: 'urem_const'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %I128 = urem i128 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = urem i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = urem <2 x i64> undef, <i64 6, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = urem <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = urem <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = urem <2 x i32> undef, <i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i16 = urem <2 x i16> undef, <i16 4, i16 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i16 = urem <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i8 = urem <2 x i8> undef, <i8 4, i8 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = urem <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i8 = urem <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = urem i64 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V2i64 = urem <2 x i64> undef, <i64 6, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V4i64 = urem <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 188 for instruction: %V8i64 = urem <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = urem i32 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = urem <2 x i32> undef, <i32 4, i32 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = urem <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = urem i16 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = urem <2 x i16> undef, <i16 4, i16 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = urem <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = urem <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I8 = urem i8 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i8 = urem <2 x i8> undef, <i8 4, i8 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = urem <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i8 = urem <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = urem <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 
@@ -297,28 +297,28 @@ define i32 @srem_uniformconst() {
 define i32 @urem_uniformconst() {
 ; CHECK-LABEL: 'urem_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %I128 = urem i128 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = urem i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = urem <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i16 = urem <2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i16 = urem <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i8 = urem <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = urem <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i8 = urem <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = urem i64 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 188 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = urem i32 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = urem <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = urem i16 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = urem <2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = urem <4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I8 = urem i8 undef, 7
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i8 = urem <2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = urem <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i8 = urem <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I128 = urem i128 undef, 7
@@ -412,29 +412,29 @@ define i32 @srem_constpow2() {
 
 define i32 @urem_constpow2() {
 ; CHECK-LABEL: 'urem_constpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %I128 = urem i128 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = urem i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = urem <2 x i64> undef, <i64 8, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = urem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = urem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = urem <2 x i32> undef, <i32 2, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i16 = urem <2 x i16> undef, <i16 2, i16 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i16 = urem <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i8 = urem <2 x i8> undef, <i8 2, i8 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = urem <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i8 = urem <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I128 = urem i128 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 8, i64 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = urem <2 x i32> undef, <i32 2, i32 4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = urem <2 x i16> undef, <i16 2, i16 4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = urem <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = urem <2 x i8> undef, <i8 2, i8 4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = urem <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = urem <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I128 = urem i128 undef, 16
@@ -528,29 +528,29 @@ define i32 @srem_uniformconstpow2() {
 
 define i32 @urem_uniformconstpow2() {
 ; CHECK-LABEL: 'urem_uniformconstpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %I128 = urem i128 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = urem i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = urem <2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i16 = urem <2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i16 = urem <4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i8 = urem <2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = urem <4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i8 = urem <8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I128 = urem i128 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = urem <2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = urem <2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = urem <4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = urem <2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = urem <4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = urem <8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I128 = urem i128 undef, 16
@@ -645,28 +645,28 @@ define i32 @srem_constnegpow2() {
 define i32 @urem_constnegpow2() {
 ; CHECK-LABEL: 'urem_constnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %I128 = urem i128 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = urem i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -8, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = urem <2 x i32> undef, <i32 -2, i32 -4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i16 = urem <2 x i16> undef, <i16 -2, i16 -4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i16 = urem <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i8 = urem <2 x i8> undef, <i8 -2, i8 -4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = urem <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i8 = urem <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = urem i64 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -8, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 188 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = urem i32 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = urem <2 x i32> undef, <i32 -2, i32 -4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = urem i16 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = urem <2 x i16> undef, <i16 -2, i16 -4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = urem <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = urem <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I8 = urem i8 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i8 = urem <2 x i8> undef, <i8 -2, i8 -4>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = urem <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i8 = urem <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = urem <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I128 = urem i128 undef, -16
@@ -761,28 +761,28 @@ define i32 @srem_uniformconstnegpow2() {
 define i32 @urem_uniformconstnegpow2() {
 ; CHECK-LABEL: 'urem_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %I128 = urem i128 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = urem i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i32 = urem <2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i16 = urem <2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i16 = urem <4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i8 = urem <2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i8 = urem <4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i8 = urem <8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = urem i64 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 188 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = urem i32 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = urem <2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = urem i16 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = urem <2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = urem <4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I8 = urem i8 undef, -16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i8 = urem <2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = urem <4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i8 = urem <8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I128 = urem i128 undef, -16
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-div.ll b/llvm/test/Analysis/CostModel/AArch64/sve-div.ll
index 33bc4ebe3c3edf..54dc8daedc1e3e 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-div.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-div.ll
@@ -181,22 +181,22 @@ define void @sdiv_uniformconst() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i32 = sdiv <vscale x 2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV8i32 = sdiv <vscale x 8 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV16i32 = sdiv <vscale x 16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i16 = sdiv <vscale x 2 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i16 = sdiv <vscale x 4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i16 = sdiv <vscale x 16 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %NV32i16 = sdiv <vscale x 32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i8 = sdiv <vscale x 2 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i8 = sdiv <vscale x 4 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i8 = sdiv <vscale x 8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %NV32i8 = sdiv <vscale x 32 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %NV64i8 = sdiv <vscale x 64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
@@ -242,42 +242,42 @@ define void @sdiv_uniformconst() {
 
 define void @udiv_uniformconst() {
 ; CHECK-LABEL: 'udiv_uniformconst'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = udiv <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i32 = udiv <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i16 = udiv <2 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = udiv <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = udiv <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i8 = udiv <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i8 = udiv <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i64 = udiv <vscale x 2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i64 = udiv <vscale x 4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i64 = udiv <vscale x 8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i32 = udiv <vscale x 2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i32 = udiv <vscale x 4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV8i32 = udiv <vscale x 8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV16i32 = udiv <vscale x 16 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i16 = udiv <vscale x 2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i16 = udiv <vscale x 4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i16 = udiv <vscale x 8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i16 = udiv <vscale x 16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %NV32i16 = udiv <vscale x 32 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i8 = udiv <vscale x 2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i8 = udiv <vscale x 4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i8 = udiv <vscale x 8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i8 = udiv <vscale x 16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %NV32i8 = udiv <vscale x 32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %NV64i8 = udiv <vscale x 64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i8 = udiv <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i8 = udiv <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i64 = udiv <vscale x 2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV4i64 = udiv <vscale x 4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV8i64 = udiv <vscale x 8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i32 = udiv <vscale x 2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i32 = udiv <vscale x 4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i32 = udiv <vscale x 8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i32 = udiv <vscale x 16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i16 = udiv <vscale x 2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i16 = udiv <vscale x 4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV8i16 = udiv <vscale x 8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV16i16 = udiv <vscale x 16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV32i16 = udiv <vscale x 32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i8 = udiv <vscale x 2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i8 = udiv <vscale x 4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV8i8 = udiv <vscale x 8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV16i8 = udiv <vscale x 16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV32i8 = udiv <vscale x 32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV64i8 = udiv <vscale x 64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V2i64 = udiv <2 x i64> undef, splat (i64 7)
@@ -400,42 +400,42 @@ define void @sdiv_uniformconstpow2() {
 
 define void @udiv_uniformconstpow2() {
 ; CHECK-LABEL: 'udiv_uniformconstpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = udiv <2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i16 = udiv <2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = udiv <4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = udiv <2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i8 = udiv <4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i8 = udiv <8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i64 = udiv <vscale x 2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i64 = udiv <vscale x 4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i64 = udiv <vscale x 8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i32 = udiv <vscale x 2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i32 = udiv <vscale x 4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV8i32 = udiv <vscale x 8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV16i32 = udiv <vscale x 16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i16 = udiv <vscale x 2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i16 = udiv <vscale x 4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i16 = udiv <vscale x 8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i16 = udiv <vscale x 16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %NV32i16 = udiv <vscale x 32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i8 = udiv <vscale x 2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i8 = udiv <vscale x 4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i8 = udiv <vscale x 8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i8 = udiv <vscale x 16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %NV32i8 = udiv <vscale x 32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %NV64i8 = udiv <vscale x 64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = udiv <2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = udiv <4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = udiv <2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = udiv <4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = udiv <8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV2i64 = udiv <vscale x 2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i64 = udiv <vscale x 4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV8i64 = udiv <vscale x 8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV2i32 = udiv <vscale x 2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV4i32 = udiv <vscale x 4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV8i32 = udiv <vscale x 8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV16i32 = udiv <vscale x 16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV2i16 = udiv <vscale x 2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV4i16 = udiv <vscale x 4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV8i16 = udiv <vscale x 8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV16i16 = udiv <vscale x 16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV32i16 = udiv <vscale x 32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV2i8 = udiv <vscale x 2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV4i8 = udiv <vscale x 4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV8i8 = udiv <vscale x 8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV16i8 = udiv <vscale x 16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV32i8 = udiv <vscale x 32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV64i8 = udiv <vscale x 64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V2i64 = udiv <2 x i64> undef, splat (i64 16)
@@ -497,22 +497,22 @@ define void @sdiv_uniformconstnegpow2() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i32 = sdiv <vscale x 2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV8i32 = sdiv <vscale x 8 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV16i32 = sdiv <vscale x 16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i16 = sdiv <vscale x 2 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i16 = sdiv <vscale x 4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i16 = sdiv <vscale x 16 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %NV32i16 = sdiv <vscale x 32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i8 = sdiv <vscale x 2 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i8 = sdiv <vscale x 4 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i8 = sdiv <vscale x 8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %NV32i8 = sdiv <vscale x 32 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %NV64i8 = sdiv <vscale x 64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
@@ -558,42 +558,42 @@ define void @sdiv_uniformconstnegpow2() {
 
 define void @udiv_uniformconstnegpow2() {
 ; CHECK-LABEL: 'udiv_uniformconstnegpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = udiv <2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i32 = udiv <2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i16 = udiv <2 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = udiv <4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = udiv <2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i8 = udiv <4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i8 = udiv <8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i64 = udiv <vscale x 2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i64 = udiv <vscale x 4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i64 = udiv <vscale x 8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i32 = udiv <vscale x 2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i32 = udiv <vscale x 4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV8i32 = udiv <vscale x 8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV16i32 = udiv <vscale x 16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i16 = udiv <vscale x 2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i16 = udiv <vscale x 4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i16 = udiv <vscale x 8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i16 = udiv <vscale x 16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %NV32i16 = udiv <vscale x 32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV2i8 = udiv <vscale x 2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i8 = udiv <vscale x 4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i8 = udiv <vscale x 8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i8 = udiv <vscale x 16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %NV32i8 = udiv <vscale x 32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %NV64i8 = udiv <vscale x 64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i8 = udiv <4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i8 = udiv <8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i64 = udiv <vscale x 2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV4i64 = udiv <vscale x 4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV8i64 = udiv <vscale x 8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i32 = udiv <vscale x 2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i32 = udiv <vscale x 4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i32 = udiv <vscale x 8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i32 = udiv <vscale x 16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i16 = udiv <vscale x 2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i16 = udiv <vscale x 4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV8i16 = udiv <vscale x 8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV16i16 = udiv <vscale x 16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV32i16 = udiv <vscale x 32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i8 = udiv <vscale x 2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i8 = udiv <vscale x 4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV8i8 = udiv <vscale x 8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV16i8 = udiv <vscale x 16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV32i8 = udiv <vscale x 32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV64i8 = udiv <vscale x 64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index 78a0ccfde88c06..b7ae41292ea00f 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -1011,17 +1011,17 @@ define void @get_lane_mask() #0 {
 
 define void @fshr() #0 {
 ; CHECK-VSCALE-1-LABEL: 'fshr'
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-VSCALE-2-LABEL: 'fshr'
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'fshr'
@@ -1040,17 +1040,17 @@ define void @fshr() #0 {
 
 define void @fshl() #0 {
 ; CHECK-VSCALE-1-LABEL: 'fshl'
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-VSCALE-2-LABEL: 'fshl'
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'fshl'
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll b/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll
index c1591fe5fde331..918307fc218ed9 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll
@@ -181,22 +181,22 @@ define void @srem_uniformconst() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
@@ -242,42 +242,42 @@ define void @srem_uniformconst() {
 
 define void @urem_uniformconst() {
 ; CHECK-LABEL: 'urem_uniformconst'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = urem <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = urem <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = urem <2 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = urem <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i8 = urem <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i8 = urem <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i8 = urem <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i64 = urem <vscale x 2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV4i64 = urem <vscale x 4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV8i64 = urem <vscale x 8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i32 = urem <vscale x 2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i32 = urem <vscale x 4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i32 = urem <vscale x 8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i32 = urem <vscale x 16 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i16 = urem <vscale x 2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i16 = urem <vscale x 4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i16 = urem <vscale x 8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV16i16 = urem <vscale x 16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %NV32i16 = urem <vscale x 32 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i8 = urem <vscale x 2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i8 = urem <vscale x 4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i8 = urem <vscale x 8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NV16i8 = urem <vscale x 16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NV32i8 = urem <vscale x 32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %NV64i8 = urem <vscale x 64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = urem <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i8 = urem <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV2i64 = urem <vscale x 2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %NV4i64 = urem <vscale x 4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %NV8i64 = urem <vscale x 8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV2i32 = urem <vscale x 2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV4i32 = urem <vscale x 4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %NV8i32 = urem <vscale x 8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %NV16i32 = urem <vscale x 16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV2i16 = urem <vscale x 2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV4i16 = urem <vscale x 4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV8i16 = urem <vscale x 8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %NV16i16 = urem <vscale x 16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %NV32i16 = urem <vscale x 32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV2i8 = urem <vscale x 2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV4i8 = urem <vscale x 4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV8i8 = urem <vscale x 8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV16i8 = urem <vscale x 16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %NV32i8 = urem <vscale x 32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %NV64i8 = urem <vscale x 64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V2i64 = urem <2 x i64> undef, splat (i64 7)
@@ -400,42 +400,42 @@ define void @srem_uniformconstpow2() {
 
 define void @urem_uniformconstpow2() {
 ; CHECK-LABEL: 'urem_uniformconstpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = urem <2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = urem <2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = urem <4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i8 = urem <2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i8 = urem <4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i8 = urem <8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i64 = urem <vscale x 2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV4i64 = urem <vscale x 4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV8i64 = urem <vscale x 8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i32 = urem <vscale x 2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i32 = urem <vscale x 4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i32 = urem <vscale x 8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i32 = urem <vscale x 16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i16 = urem <vscale x 2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i16 = urem <vscale x 4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i16 = urem <vscale x 8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV16i16 = urem <vscale x 16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %NV32i16 = urem <vscale x 32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i8 = urem <vscale x 2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i8 = urem <vscale x 4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i8 = urem <vscale x 8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NV16i8 = urem <vscale x 16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NV32i8 = urem <vscale x 32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %NV64i8 = urem <vscale x 64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = urem <2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = urem <2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = urem <4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = urem <2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = urem <4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = urem <8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV2i64 = urem <vscale x 2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV4i64 = urem <vscale x 4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV8i64 = urem <vscale x 8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV2i32 = urem <vscale x 2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV4i32 = urem <vscale x 4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV8i32 = urem <vscale x 8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV16i32 = urem <vscale x 16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV2i16 = urem <vscale x 2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV4i16 = urem <vscale x 4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV8i16 = urem <vscale x 8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV16i16 = urem <vscale x 16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV32i16 = urem <vscale x 32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV2i8 = urem <vscale x 2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV4i8 = urem <vscale x 4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV8i8 = urem <vscale x 8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NV16i8 = urem <vscale x 16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NV32i8 = urem <vscale x 32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV64i8 = urem <vscale x 64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V2i64 = urem <2 x i64> undef, splat (i64 16)
@@ -497,22 +497,22 @@ define void @srem_uniformconstnegpow2() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
@@ -558,42 +558,42 @@ define void @srem_uniformconstnegpow2() {
 
 define void @urem_uniformconstnegpow2() {
 ; CHECK-LABEL: 'urem_uniformconstnegpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = urem <2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = urem <2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = urem <2 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = urem <4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i8 = urem <2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i8 = urem <4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i8 = urem <8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i64 = urem <vscale x 2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV4i64 = urem <vscale x 4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV8i64 = urem <vscale x 8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i32 = urem <vscale x 2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i32 = urem <vscale x 4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NV8i32 = urem <vscale x 8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NV16i32 = urem <vscale x 16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i16 = urem <vscale x 2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i16 = urem <vscale x 4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i16 = urem <vscale x 8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NV16i16 = urem <vscale x 16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %NV32i16 = urem <vscale x 32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV2i8 = urem <vscale x 2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NV4i8 = urem <vscale x 4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NV8i8 = urem <vscale x 8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NV16i8 = urem <vscale x 16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NV32i8 = urem <vscale x 32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %NV64i8 = urem <vscale x 64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = urem <4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i8 = urem <8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV2i64 = urem <vscale x 2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %NV4i64 = urem <vscale x 4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %NV8i64 = urem <vscale x 8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV2i32 = urem <vscale x 2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV4i32 = urem <vscale x 4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %NV8i32 = urem <vscale x 8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %NV16i32 = urem <vscale x 16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV2i16 = urem <vscale x 2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV4i16 = urem <vscale x 4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV8i16 = urem <vscale x 8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %NV16i16 = urem <vscale x 16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %NV32i16 = urem <vscale x 32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV2i8 = urem <vscale x 2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV4i8 = urem <vscale x 4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV8i8 = urem <vscale x 8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NV16i8 = urem <vscale x 16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %NV32i8 = urem <vscale x 32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %NV64i8 = urem <vscale x 64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V2i64 = urem <2 x i64> undef, splat (i64 -16)



More information about the llvm-commits mailing list