[llvm] f2675ab - [ARM][CostModel] Implement getCFInstrCost

Wed Aug 5 04:45:10 PDT 2020

Author: Sam Parker
Date: 2020-08-05T12:44:51+01:00
New Revision: f2675ab45fbb41bb7c1e1b0b86533fc83e877b6b

URL: https://github.com/llvm/llvm-project/commit/f2675ab45fbb41bb7c1e1b0b86533fc83e877b6b
DIFF: https://github.com/llvm/llvm-project/commit/f2675ab45fbb41bb7c1e1b0b86533fc83e877b6b.diff

LOG: [ARM][CostModel] Implement getCFInstrCost

As with other targets, set the throughput cost of control-flow
instructions to free so that we don't miss out of vectorization
opportunities.

Differential Revision: https://reviews.llvm.org/D85283

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
    llvm/lib/Target/ARM/ARMTargetTransformInfo.h
    llvm/test/Analysis/CostModel/ARM/arith-overflow.ll
    llvm/test/Analysis/CostModel/ARM/arith-ssat.ll
    llvm/test/Analysis/CostModel/ARM/arith-usat.ll
    llvm/test/Analysis/CostModel/ARM/arith.ll
    llvm/test/Analysis/CostModel/ARM/cast.ll
    llvm/test/Analysis/CostModel/ARM/cast_ldst.ll
    llvm/test/Analysis/CostModel/ARM/cmps.ll
    llvm/test/Analysis/CostModel/ARM/control-flow.ll
    llvm/test/Analysis/CostModel/ARM/divrem.ll
    llvm/test/Analysis/CostModel/ARM/fparith.ll
    llvm/test/Analysis/CostModel/ARM/gep.ll
    llvm/test/Analysis/CostModel/ARM/load_store.ll
    llvm/test/Analysis/CostModel/ARM/memcpy.ll
    llvm/test/Analysis/CostModel/ARM/mul-cast-vect.ll
    llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll
    llvm/test/Analysis/CostModel/ARM/reduce-add.ll
    llvm/test/Analysis/CostModel/ARM/reduce-smax.ll
    llvm/test/Analysis/CostModel/ARM/reduce-smin.ll
    llvm/test/Analysis/CostModel/ARM/reduce-umax.ll
    llvm/test/Analysis/CostModel/ARM/reduce-umin.ll
    llvm/test/Analysis/CostModel/ARM/select.ll
    llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll
    llvm/test/Analysis/CostModel/ARM/shuffle.ll
    llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 68767398191a..6160bc875a07 100644

--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -300,6 +300,18 @@ int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Im
   return getIntImmCost(Imm, Ty, CostKind);
 }
 
+int ARMTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
+  if (CostKind == TTI::TCK_RecipThroughput &&
+      (ST->hasNEON() || ST->hasMVEIntegerOps())) {
+    // FIXME: The vectorizer is highly sensistive to the cost of these
+    // instructions, which suggests that it may be using the costs incorrectly.
+    // But, for now, just make them free to avoid performance regressions for
+    // vector targets.
+    return 0;
+  }
+  return BaseT::getCFInstrCost(Opcode, CostKind);
+}
+
 int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                                  TTI::CastContextHint CCH,
                                  TTI::TargetCostKind CostKind,

diff  --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index ac7d0378d90b..91f6d97accc6 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -209,6 +209,9 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
     }
   }
 
+  int getCFInstrCost(unsigned Opcode,
+                     TTI::TargetCostKind CostKind);
+
   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                        TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
                        const Instruction *I = nullptr);

diff  --git a/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll b/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll
index 0a681a7c2a8f..050f2a790533 100644
--- a/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll
@@ -63,7 +63,7 @@ define i32 @sadd(i32 %arg) {
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-RECIP-LABEL: 'sadd'
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
@@ -82,7 +82,7 @@ define i32 @sadd(i32 %arg) {
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; V8M-SIZE-LABEL: 'sadd'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
@@ -221,7 +221,7 @@ define i32 @uadd(i32 %arg) {
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-RECIP-LABEL: 'uadd'
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
@@ -240,7 +240,7 @@ define i32 @uadd(i32 %arg) {
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; V8M-SIZE-LABEL: 'uadd'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
@@ -379,7 +379,7 @@ define i32 @ssub(i32 %arg) {
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-RECIP-LABEL: 'ssub'
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
@@ -398,7 +398,7 @@ define i32 @ssub(i32 %arg) {
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; V8M-SIZE-LABEL: 'ssub'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
@@ -537,7 +537,7 @@ define i32 @usub(i32 %arg) {
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-RECIP-LABEL: 'usub'
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
@@ -556,7 +556,7 @@ define i32 @usub(i32 %arg) {
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; V8M-SIZE-LABEL: 'usub'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
@@ -695,7 +695,7 @@ define i32 @smul(i32 %arg) {
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-RECIP-LABEL: 'smul'
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
@@ -714,7 +714,7 @@ define i32 @smul(i32 %arg) {
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 116 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 424 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; V8M-SIZE-LABEL: 'smul'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
@@ -853,7 +853,7 @@ define i32 @umul(i32 %arg) {
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-RECIP-LABEL: 'umul'
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
@@ -872,7 +872,7 @@ define i32 @umul(i32 %arg) {
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 416 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; V8M-SIZE-LABEL: 'umul'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)

diff  --git a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll
index df5535ded070..d5afce84b136 100644
--- a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll
+++ b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll
@@ -63,7 +63,7 @@ define i32 @add(i32 %arg) {
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-RECIP-LABEL: 'add'
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
@@ -82,7 +82,7 @@ define i32 @add(i32 %arg) {
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; V8M-SIZE-LABEL: 'add'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
@@ -221,7 +221,7 @@ define i32 @sub(i32 %arg) {
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-RECIP-LABEL: 'sub'
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
@@ -240,7 +240,7 @@ define i32 @sub(i32 %arg) {
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; V8M-SIZE-LABEL: 'sub'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)

diff  --git a/llvm/test/Analysis/CostModel/ARM/arith-usat.ll b/llvm/test/Analysis/CostModel/ARM/arith-usat.ll
index 5377e0dde8c4..1059c2ee551c 100644
--- a/llvm/test/Analysis/CostModel/ARM/arith-usat.ll
+++ b/llvm/test/Analysis/CostModel/ARM/arith-usat.ll
@@ -63,7 +63,7 @@ define i32 @add(i32 %arg) {
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-RECIP-LABEL: 'add'
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
@@ -82,7 +82,7 @@ define i32 @add(i32 %arg) {
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; V8M-SIZE-LABEL: 'add'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
@@ -221,7 +221,7 @@ define i32 @sub(i32 %arg) {
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-RECIP-LABEL: 'sub'
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
@@ -240,7 +240,7 @@ define i32 @sub(i32 %arg) {
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; V8M-SIZE-LABEL: 'sub'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)

diff  --git a/llvm/test/Analysis/CostModel/ARM/arith.ll b/llvm/test/Analysis/CostModel/ARM/arith.ll
index 451c5f50b713..55b60fb9c287 100644
--- a/llvm/test/Analysis/CostModel/ARM/arith.ll
+++ b/llvm/test/Analysis/CostModel/ARM/arith.ll
@@ -17,6 +17,7 @@ define void @i8() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i8 undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i8 undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i8 undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'i8'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i8 undef, undef
@@ -28,6 +29,7 @@ define void @i8() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i8 undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i8 undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i8 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'i8'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i8 undef, undef
@@ -39,6 +41,7 @@ define void @i8() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i8 undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i8 undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i8 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'i8'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i8 undef, undef
@@ -50,6 +53,7 @@ define void @i8() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i8 undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i8 undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i8 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %c = add i8 undef, undef
   %d = sub i8 undef, undef
@@ -74,6 +78,7 @@ define void @i16() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i16 undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i16 undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i16 undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'i16'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i16 undef, undef
@@ -85,6 +90,7 @@ define void @i16() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i16 undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i16 undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i16 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'i16'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i16 undef, undef
@@ -96,6 +102,7 @@ define void @i16() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i16 undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i16 undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i16 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'i16'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i16 undef, undef
@@ -107,6 +114,7 @@ define void @i16() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i16 undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i16 undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i16 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %c = add i16 undef, undef
   %d = sub i16 undef, undef
@@ -131,6 +139,7 @@ define void @i32() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i32 undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i32 undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i32 undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'i32'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i32 undef, undef
@@ -142,6 +151,7 @@ define void @i32() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i32 undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i32 undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i32 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'i32'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i32 undef, undef
@@ -153,6 +163,7 @@ define void @i32() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i32 undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i32 undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i32 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'i32'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i32 undef, undef
@@ -164,6 +175,7 @@ define void @i32() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i32 undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i32 undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i32 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %c = add i32 undef, undef
   %d = sub i32 undef, undef
@@ -188,6 +200,7 @@ define void @i64() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i = and i64 undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %j = or i64 undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %k = xor i64 undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'i64'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c = add i64 undef, undef
@@ -199,6 +212,7 @@ define void @i64() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i = and i64 undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %j = or i64 undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %k = xor i64 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'i64'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c = add i64 undef, undef
@@ -210,6 +224,7 @@ define void @i64() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i = and i64 undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %j = or i64 undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %k = xor i64 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'i64'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c = add i64 undef, undef
@@ -221,6 +236,7 @@ define void @i64() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i = and i64 undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %j = or i64 undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %k = xor i64 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %c = add i64 undef, undef
   %d = sub i64 undef, undef
@@ -273,6 +289,7 @@ define void @vi8() {
 ; CHECK-MVE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = and <16 x i8> undef, undef
 ; CHECK-MVE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j16 = or <16 x i8> undef, undef
 ; CHECK-MVE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k16 = xor <16 x i8> undef, undef
+; CHECK-MVE1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE2-LABEL: 'vi8'
 ; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %c2 = add <2 x i8> undef, undef
@@ -311,6 +328,7 @@ define void @vi8() {
 ; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = and <16 x i8> undef, undef
 ; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %j16 = or <16 x i8> undef, undef
 ; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %k16 = xor <16 x i8> undef, undef
+; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE4-LABEL: 'vi8'
 ; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i8> undef, undef
@@ -349,6 +367,7 @@ define void @vi8() {
 ; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i16 = and <16 x i8> undef, undef
 ; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %j16 = or <16 x i8> undef, undef
 ; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %k16 = xor <16 x i8> undef, undef
+; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi8'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c2 = add <2 x i8> undef, undef
@@ -387,6 +406,7 @@ define void @vi8() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i8> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i8> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i8> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi8'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c2 = add <2 x i8> undef, undef
@@ -425,6 +445,7 @@ define void @vi8() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i8> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i8> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i8> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi8'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i8> undef, undef
@@ -463,6 +484,7 @@ define void @vi8() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = and <16 x i8> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j16 = or <16 x i8> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k16 = xor <16 x i8> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %c2 = add <2 x i8> undef, undef
   %d2 = sub <2 x i8> undef, undef
@@ -541,6 +563,7 @@ define void @vi16() {
 ; CHECK-MVE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = and <16 x i16> undef, undef
 ; CHECK-MVE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %j16 = or <16 x i16> undef, undef
 ; CHECK-MVE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %k16 = xor <16 x i16> undef, undef
+; CHECK-MVE1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE2-LABEL: 'vi16'
 ; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %c2 = add <2 x i16> undef, undef
@@ -579,6 +602,7 @@ define void @vi16() {
 ; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i16 = and <16 x i16> undef, undef
 ; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %j16 = or <16 x i16> undef, undef
 ; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %k16 = xor <16 x i16> undef, undef
+; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE4-LABEL: 'vi16'
 ; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i16> undef, undef
@@ -617,6 +641,7 @@ define void @vi16() {
 ; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i16> undef, undef
 ; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i16> undef, undef
 ; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i16> undef, undef
+; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi16'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c2 = add <2 x i16> undef, undef
@@ -655,6 +680,7 @@ define void @vi16() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i16> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i16> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i16> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi16'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c2 = add <2 x i16> undef, undef
@@ -693,6 +719,7 @@ define void @vi16() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i16> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i16> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i16> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi16'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i16> undef, undef
@@ -731,6 +758,7 @@ define void @vi16() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = and <16 x i16> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %j16 = or <16 x i16> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %k16 = xor <16 x i16> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %c2 = add <2 x i16> undef, undef
   %d2 = sub <2 x i16> undef, undef
@@ -809,6 +837,7 @@ define void @vi32() {
 ; CHECK-MVE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i16 = and <16 x i32> undef, undef
 ; CHECK-MVE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %j16 = or <16 x i32> undef, undef
 ; CHECK-MVE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %k16 = xor <16 x i32> undef, undef
+; CHECK-MVE1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE2-LABEL: 'vi32'
 ; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %c2 = add <2 x i32> undef, undef
@@ -847,6 +876,7 @@ define void @vi32() {
 ; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i32> undef, undef
 ; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i32> undef, undef
 ; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i32> undef, undef
+; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE4-LABEL: 'vi32'
 ; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i32> undef, undef
@@ -885,6 +915,7 @@ define void @vi32() {
 ; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i32> undef, undef
 ; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i32> undef, undef
 ; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i32> undef, undef
+; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi32'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c2 = add <2 x i32> undef, undef
@@ -923,6 +954,7 @@ define void @vi32() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i32> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i32> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi32'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c2 = add <2 x i32> undef, undef
@@ -961,6 +993,7 @@ define void @vi32() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i32> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i32> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi32'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i32> undef, undef
@@ -999,6 +1032,7 @@ define void @vi32() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i16 = and <16 x i32> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %j16 = or <16 x i32> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %k16 = xor <16 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %c2 = add <2 x i32> undef, undef
   %d2 = sub <2 x i32> undef, undef
@@ -1077,6 +1111,7 @@ define void @vi64() {
 ; CHECK-MVE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i64> undef, undef
 ; CHECK-MVE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i64> undef, undef
 ; CHECK-MVE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i64> undef, undef
+; CHECK-MVE1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE2-LABEL: 'vi64'
 ; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %c2 = add <2 x i64> undef, undef
@@ -1115,6 +1150,7 @@ define void @vi64() {
 ; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i64> undef, undef
 ; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i64> undef, undef
 ; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i64> undef, undef
+; CHECK-MVE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE4-LABEL: 'vi64'
 ; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %c2 = add <2 x i64> undef, undef
@@ -1153,6 +1189,7 @@ define void @vi64() {
 ; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %i16 = and <16 x i64> undef, undef
 ; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %j16 = or <16 x i64> undef, undef
 ; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %k16 = xor <16 x i64> undef, undef
+; CHECK-MVE4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi64'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %c2 = add <2 x i64> undef, undef
@@ -1191,6 +1228,7 @@ define void @vi64() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %i16 = and <16 x i64> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %j16 = or <16 x i64> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %k16 = xor <16 x i64> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi64'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %c2 = add <2 x i64> undef, undef
@@ -1229,6 +1267,7 @@ define void @vi64() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %i16 = and <16 x i64> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %j16 = or <16 x i64> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %k16 = xor <16 x i64> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi64'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i64> undef, undef
@@ -1267,6 +1306,7 @@ define void @vi64() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i64> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i64> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i64> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %c2 = add <2 x i64> undef, undef
   %d2 = sub <2 x i64> undef, undef

diff  --git a/llvm/test/Analysis/CostModel/ARM/cast.ll b/llvm/test/Analysis/CostModel/ARM/cast.ll
index 28f7c6cfcf36..b539dae1585e 100644
--- a/llvm/test/Analysis/CostModel/ARM/cast.ll
+++ b/llvm/test/Analysis/CostModel/ARM/cast.ll
@@ -396,7 +396,7 @@ define i32 @casts() {
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double>
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double>
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double>
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-RECIP-LABEL: 'casts'
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8
@@ -783,7 +783,7 @@ define i32 @casts() {
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2088 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double>
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2048 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double>
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2048 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double>
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-V8M-MAIN-RECIP-LABEL: 'casts'
 ; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8
@@ -1944,7 +1944,7 @@ define i32 @casts() {
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double>
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double>
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double>
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-SIZE-LABEL: 'casts'
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8
@@ -3935,7 +3935,7 @@ define i32 @bitcasts() {
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-RECIP-LABEL: 'bitcasts'
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32
@@ -3946,7 +3946,7 @@ define i32 @bitcasts() {
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-V8M-MAIN-RECIP-LABEL: 'bitcasts'
 ; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32
@@ -3979,7 +3979,7 @@ define i32 @bitcasts() {
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-SIZE-LABEL: 'bitcasts'
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32

diff  --git a/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll b/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll
index 54029289661c..839d3e47dd98 100644
--- a/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll
+++ b/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll
@@ -75,7 +75,7 @@ define i32 @load_extends() {
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-RECIP-LABEL: 'load_extends'
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
@@ -142,7 +142,7 @@ define i32 @load_extends() {
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-V8M-MAIN-RECIP-LABEL: 'load_extends'
 ; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
@@ -343,7 +343,7 @@ define i32 @load_extends() {
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-SIZE-LABEL: 'load_extends'
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
@@ -739,7 +739,7 @@ define i32 @store_trunc() {
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-RECIP-LABEL: 'store_trunc'
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8
@@ -794,7 +794,7 @@ define i32 @store_trunc() {
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-V8M-MAIN-RECIP-LABEL: 'store_trunc'
 ; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8
@@ -959,7 +959,7 @@ define i32 @store_trunc() {
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-SIZE-LABEL: 'store_trunc'
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8
@@ -1268,7 +1268,7 @@ define i32 @load_fpextends() {
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-RECIP-LABEL: 'load_fpextends'
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
@@ -1296,7 +1296,7 @@ define i32 @load_fpextends() {
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-V8M-MAIN-RECIP-LABEL: 'load_fpextends'
 ; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
@@ -1380,7 +1380,7 @@ define i32 @load_fpextends() {
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-SIZE-LABEL: 'load_fpextends'
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
@@ -1550,7 +1550,7 @@ define i32 @load_fptrunc() {
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-RECIP-LABEL: 'load_fptrunc'
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i1632 = fptrunc float undef to half
@@ -1575,7 +1575,7 @@ define i32 @load_fptrunc() {
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-V8M-MAIN-RECIP-LABEL: 'load_fptrunc'
 ; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i1632 = fptrunc float undef to half
@@ -1650,7 +1650,7 @@ define i32 @load_fptrunc() {
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-SIZE-LABEL: 'load_fptrunc'
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i1632 = fptrunc float undef to half
@@ -1832,7 +1832,7 @@ define i32 @maskedload_extends() {
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-RECIP-LABEL: 'maskedload_extends'
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
@@ -1884,7 +1884,7 @@ define i32 @maskedload_extends() {
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedload_extends'
 ; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
@@ -2040,7 +2040,7 @@ define i32 @maskedload_extends() {
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-SIZE-LABEL: 'maskedload_extends'
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
@@ -2348,7 +2348,7 @@ define i32 @maskedstore_trunc() {
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-RECIP-LABEL: 'maskedstore_trunc'
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
@@ -2391,7 +2391,7 @@ define i32 @maskedstore_trunc() {
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedstore_trunc'
 ; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
@@ -2520,7 +2520,7 @@ define i32 @maskedstore_trunc() {
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-SIZE-LABEL: 'maskedstore_trunc'
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
@@ -2763,7 +2763,7 @@ define i32 @maskedload_fpextends() {
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-RECIP-LABEL: 'maskedload_fpextends'
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
@@ -2786,7 +2786,7 @@ define i32 @maskedload_fpextends() {
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedload_fpextends'
 ; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
@@ -2855,7 +2855,7 @@ define i32 @maskedload_fpextends() {
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-SIZE-LABEL: 'maskedload_fpextends'
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
@@ -2993,7 +2993,7 @@ define i32 @maskedload_fptrunc() {
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-RECIP-LABEL: 'maskedload_fptrunc'
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
@@ -3012,7 +3012,7 @@ define i32 @maskedload_fptrunc() {
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedload_fptrunc'
 ; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
@@ -3069,7 +3069,7 @@ define i32 @maskedload_fptrunc() {
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-SIZE-LABEL: 'maskedload_fptrunc'
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>

diff  --git a/llvm/test/Analysis/CostModel/ARM/cmps.ll b/llvm/test/Analysis/CostModel/ARM/cmps.ll
index 95a01bc7af4f..e2e7c0492c31 100644
--- a/llvm/test/Analysis/CostModel/ARM/cmps.ll
+++ b/llvm/test/Analysis/CostModel/ARM/cmps.ll
@@ -23,6 +23,7 @@ define i32 @cmps() {
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a10 = fcmp olt <8 x half> undef, undef
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a11 = fcmp oge <4 x float> undef, undef
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %a12 = fcmp oge <2 x double> undef, undef
+; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-V8M-MAIN-RECIP-LABEL: 'cmps'
 ; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef
@@ -38,6 +39,7 @@ define i32 @cmps() {
 ; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a10 = fcmp olt <8 x half> undef, undef
 ; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a11 = fcmp oge <4 x float> undef, undef
 ; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a12 = fcmp oge <2 x double> undef, undef
+; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; CHECK-V8M-BASE-RECIP-LABEL: 'cmps'
 ; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef
@@ -53,6 +55,7 @@ define i32 @cmps() {
 ; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a10 = fcmp olt <8 x half> undef, undef
 ; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a11 = fcmp oge <4 x float> undef, undef
 ; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a12 = fcmp oge <2 x double> undef, undef
+; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; CHECK-V8R-RECIP-LABEL: 'cmps'
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef
@@ -68,6 +71,7 @@ define i32 @cmps() {
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %a10 = fcmp olt <8 x half> undef, undef
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef
 ; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a12 = fcmp oge <2 x double> undef, undef
+; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-SIZE-LABEL: 'cmps'
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef
@@ -83,6 +87,7 @@ define i32 @cmps() {
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; CHECK-V8M-MAIN-SIZE-LABEL: 'cmps'
 ; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef
@@ -98,6 +103,7 @@ define i32 @cmps() {
 ; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef
 ; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef
 ; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef
+; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; CHECK-V8M-BASE-SIZE-LABEL: 'cmps'
 ; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef
@@ -113,6 +119,7 @@ define i32 @cmps() {
 ; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef
 ; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef
 ; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef
+; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; CHECK-V8R-SIZE-LABEL: 'cmps'
 ; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef
@@ -128,6 +135,7 @@ define i32 @cmps() {
 ; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef
 ; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef
 ; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef
+; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %a = icmp slt i8 undef, undef
   %b = icmp ult i16 undef, undef

diff  --git a/llvm/test/Analysis/CostModel/ARM/control-flow.ll b/llvm/test/Analysis/CostModel/ARM/control-flow.ll
index f4c55a9b62e0..d2d649d4df49 100644
--- a/llvm/test/Analysis/CostModel/ARM/control-flow.ll
+++ b/llvm/test/Analysis/CostModel/ARM/control-flow.ll
@@ -1,12 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -cost-model -analyze -cost-kind=code-size  -mtriple=thumbv8m.base   | FileCheck %s --check-prefix=CHECK-T1-SIZE
 ; RUN: opt < %s -cost-model -analyze -cost-kind=code-size  -mtriple=thumbv8m.main   | FileCheck %s --check-prefix=CHECK-V8M-SIZE
+; RUN: opt < %s -cost-model -analyze -cost-kind=code-size  -mtriple=thumbv8.1m.main -mattr=+mve  | FileCheck %s --check-prefix=CHECK-MVE-SIZE
+; RUN: opt < %s -cost-model -analyze -cost-kind=code-size  -mtriple=armv8a -mattr=+neon  | FileCheck %s --check-prefix=CHECK-NEON-SIZE
 ; RUN: opt < %s -cost-model -analyze -cost-kind=latency    -mtriple=thumbv8m.base   | FileCheck %s --check-prefix=CHECK-T1-LATENCY
 ; RUN: opt < %s -cost-model -analyze -cost-kind=latency    -mtriple=thumbv8m.main   | FileCheck %s --check-prefix=CHECK-V8M-LATENCY
 ; RUN: opt < %s -cost-model -analyze -cost-kind=latency    -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-V8_1M-LATENCY
 ; RUN: opt < %s -cost-model -analyze -cost-kind=throughput -mtriple=thumbv8m.base   | FileCheck %s --check-prefix=CHECK-T1-THROUGHPUT
 ; RUN: opt < %s -cost-model -analyze -cost-kind=throughput -mtriple=thumbv8m.main   | FileCheck %s --check-prefix=CHECK-V8M-THROUGHPUT
 ; RUN: opt < %s -cost-model -analyze -cost-kind=throughput -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-V8_1M-THROUGHPUT
+; RUN: opt < %s -cost-model -analyze -cost-kind=throughput -mtriple=thumbv8.1m.main -mattr=+mve | FileCheck %s --check-prefix=CHECK-MVE-THROUGHPUT
+; RUN: opt < %s -cost-model -analyze -cost-kind=throughput -mtriple=armv8a -mattr=+neon | FileCheck %s --check-prefix=CHECK-NEON-THROUGHPUT
 
 define i32 @simple_loop_cost(i32 %N) {
 ; CHECK-T1-SIZE-LABEL: 'simple_loop_cost'
@@ -31,6 +35,28 @@ define i32 @simple_loop_cost(i32 %N) {
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
 ;
+; CHECK-MVE-SIZE-LABEL: 'simple_loop_cost'
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
+;
+; CHECK-NEON-SIZE-LABEL: 'simple_loop_cost'
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
+;
 ; CHECK-T1-LATENCY-LABEL: 'simple_loop_cost'
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
@@ -97,6 +123,28 @@ define i32 @simple_loop_cost(i32 %N) {
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
 ;
+; CHECK-MVE-THROUGHPUT-LABEL: 'simple_loop_cost'
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %loop
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res
+;
+; CHECK-NEON-THROUGHPUT-LABEL: 'simple_loop_cost'
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %loop
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res
+;
 entry:
   %zero = icmp eq i32 %N, 0
   br i1 %zero, label %exit, label %preheader
@@ -122,10 +170,10 @@ define i32 @simple_mul_loop(i32* %A, i32* %B, i32 %N) {
 ; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
 ; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
 ; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a
+; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
 ; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
 ; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b
+; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
 ; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
 ; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
 ; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
@@ -138,26 +186,58 @@ define i32 @simple_mul_loop(i32* %A, i32* %B, i32 %N) {
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a
+; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b
+; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
 ;
+; CHECK-MVE-SIZE-LABEL: 'simple_mul_loop'
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
+;
+; CHECK-NEON-SIZE-LABEL: 'simple_mul_loop'
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
+;
 ; CHECK-T1-LATENCY-LABEL: 'simple_mul_loop'
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a
+; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a, align 4
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b
+; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
@@ -170,10 +250,10 @@ define i32 @simple_mul_loop(i32* %A, i32* %B, i32 %N) {
 ; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
 ; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
 ; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a
+; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a, align 4
 ; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
 ; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b
+; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
 ; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
 ; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
 ; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
@@ -186,10 +266,10 @@ define i32 @simple_mul_loop(i32* %A, i32* %B, i32 %N) {
 ; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
 ; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
 ; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a
+; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a, align 4
 ; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
 ; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b
+; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
 ; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
 ; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
 ; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
@@ -202,10 +282,10 @@ define i32 @simple_mul_loop(i32* %A, i32* %B, i32 %N) {
 ; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
 ; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
 ; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a
+; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
 ; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
 ; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b
+; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
 ; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
 ; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
 ; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
@@ -218,10 +298,10 @@ define i32 @simple_mul_loop(i32* %A, i32* %B, i32 %N) {
 ; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
 ; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
 ; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a
+; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
 ; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
 ; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b
+; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
 ; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
 ; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
 ; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
@@ -234,16 +314,48 @@ define i32 @simple_mul_loop(i32* %A, i32* %B, i32 %N) {
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a
+; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b
+; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
 ;
+; CHECK-MVE-THROUGHPUT-LABEL: 'simple_mul_loop'
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %loop
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res
+;
+; CHECK-NEON-THROUGHPUT-LABEL: 'simple_mul_loop'
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %loop
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res
+;
 entry:
   %zero = icmp eq i32 %N, 0
   br i1 %zero, label %exit, label %preheader
@@ -275,10 +387,10 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) {
 ; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
 ; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
 ; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a
+; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
 ; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
 ; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b
+; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
 ; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
 ; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
 ; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
@@ -294,10 +406,10 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) {
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a
+; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b
+; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
@@ -306,6 +418,44 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) {
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
 ; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
 ;
+; CHECK-MVE-SIZE-LABEL: 'simple_mul_ext_lsr_loop'
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
+;
+; CHECK-NEON-SIZE-LABEL: 'simple_mul_ext_lsr_loop'
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
+;
 ; CHECK-T1-LATENCY-LABEL: 'simple_mul_ext_lsr_loop'
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
@@ -313,10 +463,10 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) {
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a
+; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a, align 2
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b
+; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
 ; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
@@ -332,10 +482,10 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) {
 ; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
 ; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
 ; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a
+; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a, align 2
 ; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
 ; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b
+; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
 ; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
 ; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
 ; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
@@ -351,10 +501,10 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) {
 ; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
 ; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
 ; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a
+; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a, align 2
 ; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
 ; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b
+; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
 ; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
 ; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
 ; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
@@ -370,10 +520,10 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) {
 ; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
 ; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
 ; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a
+; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
 ; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
 ; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b
+; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
 ; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
 ; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
 ; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
@@ -389,10 +539,10 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) {
 ; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
 ; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
 ; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a
+; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
 ; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
 ; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b
+; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
 ; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
 ; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
 ; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
@@ -408,10 +558,10 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) {
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a
+; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b
+; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
@@ -420,6 +570,44 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) {
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
 ; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
 ;
+; CHECK-MVE-THROUGHPUT-LABEL: 'simple_mul_ext_lsr_loop'
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %loop
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res
+;
+; CHECK-NEON-THROUGHPUT-LABEL: 'simple_mul_ext_lsr_loop'
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %loop
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res
+;
 entry:
   %zero = icmp eq i32 %N, 0
   br i1 %zero, label %exit, label %preheader

diff  --git a/llvm/test/Analysis/CostModel/ARM/divrem.ll b/llvm/test/Analysis/CostModel/ARM/divrem.ll
index d3ebfb8ff0df..182a86296e71 100644
--- a/llvm/test/Analysis/CostModel/ARM/divrem.ll
+++ b/llvm/test/Analysis/CostModel/ARM/divrem.ll
@@ -15,6 +15,7 @@ define void @i8() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i8 undef, 2
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i8 undef, 2
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i8 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'i8'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i8 undef, undef
@@ -25,6 +26,7 @@ define void @i8() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i8 undef, 2
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i8 undef, 2
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i8 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'i8'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i8 undef, undef
@@ -35,6 +37,7 @@ define void @i8() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i8 undef, 2
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i8 undef, 2
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i8 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'i8'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i8 undef, undef
@@ -45,6 +48,7 @@ define void @i8() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i8 undef, 2
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i8 undef, 2
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i8 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'i8'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i8 undef, undef
@@ -55,6 +59,7 @@ define void @i8() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i8 undef, 2
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i8 undef, 2
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i8 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %1 = sdiv i8 undef, undef
   %2 = udiv i8 undef, undef
@@ -77,6 +82,7 @@ define void @i16() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i16 undef, 2
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i16 undef, 2
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i16 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'i16'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i16 undef, undef
@@ -87,6 +93,7 @@ define void @i16() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i16 undef, 2
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i16 undef, 2
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i16 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'i16'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i16 undef, undef
@@ -97,6 +104,7 @@ define void @i16() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i16 undef, 2
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i16 undef, 2
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i16 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'i16'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i16 undef, undef
@@ -107,6 +115,7 @@ define void @i16() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i16 undef, 2
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i16 undef, 2
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i16 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'i16'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i16 undef, undef
@@ -117,6 +126,7 @@ define void @i16() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i16 undef, 2
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i16 undef, 2
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i16 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %1 = sdiv i16 undef, undef
   %2 = udiv i16 undef, undef
@@ -139,6 +149,7 @@ define void @i32() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i32 undef, 2
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i32 undef, 2
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i32 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'i32'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i32 undef, undef
@@ -149,6 +160,7 @@ define void @i32() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i32 undef, 2
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i32 undef, 2
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i32 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'i32'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i32 undef, undef
@@ -159,6 +171,7 @@ define void @i32() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i32 undef, 2
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i32 undef, 2
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i32 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'i32'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i32 undef, undef
@@ -169,6 +182,7 @@ define void @i32() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i32 undef, 2
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i32 undef, 2
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i32 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'i32'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i32 undef, undef
@@ -179,6 +193,7 @@ define void @i32() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i32 undef, 2
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i32 undef, 2
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i32 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %1 = sdiv i32 undef, undef
   %2 = udiv i32 undef, undef
@@ -201,6 +216,7 @@ define void @i64() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = udiv i64 undef, 2
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i64 undef, 2
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i64 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'i64'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i64 undef, undef
@@ -211,6 +227,7 @@ define void @i64() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i64 undef, 2
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i64 undef, 2
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i64 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'i64'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i64 undef, undef
@@ -221,6 +238,7 @@ define void @i64() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i64 undef, 2
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i64 undef, 2
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i64 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'i64'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i64 undef, undef
@@ -231,6 +249,7 @@ define void @i64() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i64 undef, 2
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i64 undef, 2
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i64 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'i64'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i64 undef, undef
@@ -241,6 +260,7 @@ define void @i64() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i64 undef, 2
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i64 undef, 2
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i64 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %1 = sdiv i64 undef, undef
   %2 = udiv i64 undef, undef
@@ -259,30 +279,35 @@ define void @f16() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem half undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv half undef, 0xH4000
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = frem half undef, 0xH4000
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'f16'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv half undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = frem half undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv half undef, 0xH4000
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = frem half undef, 0xH4000
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'f16'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv half undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = frem half undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv half undef, 0xH4000
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = frem half undef, 0xH4000
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'f16'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv half undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = frem half undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv half undef, 0xH4000
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = frem half undef, 0xH4000
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'f16'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv half undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem half undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv half undef, 0xH4000
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = frem half undef, 0xH4000
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %1 = fdiv half undef, undef
   %2 = frem half undef, undef
@@ -297,30 +322,35 @@ define void @f32() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem float undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv float undef, 2.000000e+00
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = frem float undef, 2.000000e+00
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'f32'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv float undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = frem float undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv float undef, 2.000000e+00
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = frem float undef, 2.000000e+00
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'f32'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv float undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = frem float undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv float undef, 2.000000e+00
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = frem float undef, 2.000000e+00
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'f32'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv float undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = frem float undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv float undef, 2.000000e+00
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = frem float undef, 2.000000e+00
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'f32'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv float undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem float undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv float undef, 2.000000e+00
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = frem float undef, 2.000000e+00
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %1 = fdiv float undef, undef
   %2 = frem float undef, undef
@@ -335,30 +365,35 @@ define void @f64() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem double undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv double undef, 2.000000e+00
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = frem double undef, 2.000000e+00
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'f64'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv double undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = frem double undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv double undef, 2.000000e+00
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = frem double undef, 2.000000e+00
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'f64'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv double undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem double undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv double undef, 2.000000e+00
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = frem double undef, 2.000000e+00
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'f64'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv double undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem double undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv double undef, 2.000000e+00
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = frem double undef, 2.000000e+00
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'f64'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv double undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem double undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv double undef, 2.000000e+00
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = frem double undef, 2.000000e+00
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %1 = fdiv double undef, undef
   %2 = frem double undef, undef
@@ -385,6 +420,7 @@ define void @vi8() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi8'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i8> undef, undef
@@ -403,6 +439,7 @@ define void @vi8() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i8> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i8> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i8> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi8'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, undef
@@ -421,6 +458,7 @@ define void @vi8() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi8'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, undef
@@ -439,6 +477,7 @@ define void @vi8() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi8'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, undef
@@ -457,6 +496,7 @@ define void @vi8() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t1 = sdiv <2 x i8> undef, undef
   %t2 = udiv <2 x i8> undef, undef
@@ -495,6 +535,7 @@ define void @vi16() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi16'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i16> undef, undef
@@ -513,6 +554,7 @@ define void @vi16() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i16> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i16> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i16> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi16'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, undef
@@ -531,6 +573,7 @@ define void @vi16() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi16'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, undef
@@ -549,6 +592,7 @@ define void @vi16() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi16'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, undef
@@ -567,6 +611,7 @@ define void @vi16() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t1 = sdiv <2 x i16> undef, undef
   %t2 = udiv <2 x i16> undef, undef
@@ -605,6 +650,7 @@ define void @vi32() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi32'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i32> undef, undef
@@ -623,6 +669,7 @@ define void @vi32() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i32> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i32> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i32> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi32'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, undef
@@ -641,6 +688,7 @@ define void @vi32() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi32'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, undef
@@ -659,6 +707,7 @@ define void @vi32() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi32'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, undef
@@ -677,6 +726,7 @@ define void @vi32() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t1 = sdiv <2 x i32> undef, undef
   %t2 = udiv <2 x i32> undef, undef
@@ -715,6 +765,7 @@ define void @vi64() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi64'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t1 = sdiv <2 x i64> undef, undef
@@ -733,6 +784,7 @@ define void @vi64() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %s2 = udiv <16 x i64> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i64> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i64> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi64'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, undef
@@ -751,6 +803,7 @@ define void @vi64() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi64'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, undef
@@ -769,6 +822,7 @@ define void @vi64() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi64'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, undef
@@ -787,6 +841,7 @@ define void @vi64() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t1 = sdiv <2 x i64> undef, undef
   %t2 = udiv <2 x i64> undef, undef
@@ -815,6 +870,7 @@ define void @vf16() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = fdiv <4 x half> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = fdiv <8 x half> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf16'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x half> undef, undef
@@ -823,6 +879,7 @@ define void @vf16() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %4 = fdiv <4 x half> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x half> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %6 = fdiv <8 x half> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vf16'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, undef
@@ -831,6 +888,7 @@ define void @vf16() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = fdiv <4 x half> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fdiv <8 x half> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vf16'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, undef
@@ -839,6 +897,7 @@ define void @vf16() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = fdiv <4 x half> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fdiv <8 x half> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vf16'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, undef
@@ -847,6 +906,7 @@ define void @vf16() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = fdiv <4 x half> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = fdiv <8 x half> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %1 = fdiv <2 x half> undef, undef
   %2 = fdiv <2 x half> undef, undef
@@ -865,6 +925,7 @@ define void @vf32() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = fdiv <4 x float> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = fdiv <8 x float> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf32'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x float> undef, undef
@@ -873,6 +934,7 @@ define void @vf32() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %4 = fdiv <4 x float> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x float> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %6 = fdiv <8 x float> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vf32'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, undef
@@ -881,6 +943,7 @@ define void @vf32() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = fdiv <4 x float> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fdiv <8 x float> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vf32'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, undef
@@ -889,6 +952,7 @@ define void @vf32() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = fdiv <4 x float> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fdiv <8 x float> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vf32'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, undef
@@ -897,6 +961,7 @@ define void @vf32() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = fdiv <4 x float> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = fdiv <8 x float> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %1 = fdiv <2 x float> undef, undef
   %2 = fdiv <2 x float> undef, undef
@@ -915,6 +980,7 @@ define void @vf64() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = fdiv <4 x double> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, undef
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %6 = fdiv <8 x double> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf64'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x double> undef, undef
@@ -923,6 +989,7 @@ define void @vf64() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %4 = fdiv <4 x double> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x double> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %6 = fdiv <8 x double> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vf64'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, undef
@@ -931,6 +998,7 @@ define void @vf64() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = fdiv <4 x double> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, undef
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = fdiv <8 x double> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vf64'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, undef
@@ -939,6 +1007,7 @@ define void @vf64() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = fdiv <4 x double> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, undef
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = fdiv <8 x double> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vf64'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, undef
@@ -947,6 +1016,7 @@ define void @vf64() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = fdiv <4 x double> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, undef
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %6 = fdiv <8 x double> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %1 = fdiv <2 x double> undef, undef
   %2 = fdiv <2 x double> undef, undef
@@ -975,6 +1045,7 @@ define void @vi8_2() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi8_2'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i8> undef, <i8 2, i8 2>
@@ -993,6 +1064,7 @@ define void @vi8_2() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi8_2'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, <i8 2, i8 2>
@@ -1011,6 +1083,7 @@ define void @vi8_2() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi8_2'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, <i8 2, i8 2>
@@ -1029,6 +1102,7 @@ define void @vi8_2() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi8_2'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, <i8 2, i8 2>
@@ -1047,6 +1121,7 @@ define void @vi8_2() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t1 = sdiv <2 x i8> undef, <i8 2, i8 2>
   %t2 = udiv <2 x i8> undef, <i8 2, i8 2>
@@ -1085,6 +1160,7 @@ define void @vi16_2() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi16_2'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i16> undef, <i16 2, i16 2>
@@ -1103,6 +1179,7 @@ define void @vi16_2() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi16_2'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, <i16 2, i16 2>
@@ -1121,6 +1198,7 @@ define void @vi16_2() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi16_2'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, <i16 2, i16 2>
@@ -1139,6 +1217,7 @@ define void @vi16_2() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi16_2'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, <i16 2, i16 2>
@@ -1157,6 +1236,7 @@ define void @vi16_2() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t1 = sdiv <2 x i16> undef, <i16 2, i16 2>
   %t2 = udiv <2 x i16> undef, <i16 2, i16 2>
@@ -1195,6 +1275,7 @@ define void @vi32_2() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi32_2'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i32> undef, <i32 2, i32 2>
@@ -1213,6 +1294,7 @@ define void @vi32_2() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi32_2'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, <i32 2, i32 2>
@@ -1231,6 +1313,7 @@ define void @vi32_2() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi32_2'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, <i32 2, i32 2>
@@ -1249,6 +1332,7 @@ define void @vi32_2() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi32_2'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, <i32 2, i32 2>
@@ -1267,6 +1351,7 @@ define void @vi32_2() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t1 = sdiv <2 x i32> undef, <i32 2, i32 2>
   %t2 = udiv <2 x i32> undef, <i32 2, i32 2>
@@ -1305,6 +1390,7 @@ define void @vi64_2() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi64_2'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t1 = sdiv <2 x i64> undef, <i64 2, i64 2>
@@ -1323,6 +1409,7 @@ define void @vi64_2() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %s2 = udiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi64_2'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, <i64 2, i64 2>
@@ -1341,6 +1428,7 @@ define void @vi64_2() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi64_2'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, <i64 2, i64 2>
@@ -1359,6 +1447,7 @@ define void @vi64_2() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi64_2'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, <i64 2, i64 2>
@@ -1377,6 +1466,7 @@ define void @vi64_2() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t1 = sdiv <2 x i64> undef, <i64 2, i64 2>
   %t2 = udiv <2 x i64> undef, <i64 2, i64 2>
@@ -1405,6 +1495,7 @@ define void @vf16_2() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = fdiv <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf16_2'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x half> undef, <half 0xH4000, half 0xH4000>
@@ -1413,6 +1504,7 @@ define void @vf16_2() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %4 = fdiv <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %6 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vf16_2'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, <half 0xH4000, half 0xH4000>
@@ -1421,6 +1513,7 @@ define void @vf16_2() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = fdiv <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vf16_2'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, <half 0xH4000, half 0xH4000>
@@ -1429,6 +1522,7 @@ define void @vf16_2() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = fdiv <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vf16_2'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, <half 0xH4000, half 0xH4000>
@@ -1437,6 +1531,7 @@ define void @vf16_2() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = fdiv <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %1 = fdiv <2 x half> undef, <half 2., half 2.>
   %2 = fdiv <2 x half> undef, <half 2., half 2.>
@@ -1455,6 +1550,7 @@ define void @vf32_2() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = fdiv <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf32_2'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
@@ -1463,6 +1559,7 @@ define void @vf32_2() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %4 = fdiv <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %6 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vf32_2'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
@@ -1471,6 +1568,7 @@ define void @vf32_2() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = fdiv <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vf32_2'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
@@ -1479,6 +1577,7 @@ define void @vf32_2() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = fdiv <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vf32_2'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
@@ -1487,6 +1586,7 @@ define void @vf32_2() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = fdiv <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %1 = fdiv <2 x float> undef, <float 2., float 2.>
   %2 = fdiv <2 x float> undef, <float 2., float 2.>
@@ -1505,6 +1605,7 @@ define void @vf64_2() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = fdiv <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %6 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf64_2'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
@@ -1513,6 +1614,7 @@ define void @vf64_2() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %4 = fdiv <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %6 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vf64_2'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
@@ -1521,6 +1623,7 @@ define void @vf64_2() {
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = fdiv <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vf64_2'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
@@ -1529,6 +1632,7 @@ define void @vf64_2() {
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = fdiv <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vf64_2'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
@@ -1537,6 +1641,7 @@ define void @vf64_2() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = fdiv <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %6 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %1 = fdiv <2 x double> undef, <double 2., double 2.>
   %2 = fdiv <2 x double> undef, <double 2., double 2.>

diff  --git a/llvm/test/Analysis/CostModel/ARM/fparith.ll b/llvm/test/Analysis/CostModel/ARM/fparith.ll
index 31a73c1868e5..cb3d66edfa20 100644
--- a/llvm/test/Analysis/CostModel/ARM/fparith.ll
+++ b/llvm/test/Analysis/CostModel/ARM/fparith.ll
@@ -7,13 +7,13 @@ define void @f32() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = fadd float undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = fsub float undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fmul float undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'f32'
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = fadd float undef, undef
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = fsub float undef, undef
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fmul float undef, undef
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %c = fadd float undef, undef
   %d = fsub float undef, undef
@@ -26,13 +26,13 @@ define void @f16() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = fadd half undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = fsub half undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fmul half undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'f16'
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = fadd half undef, undef
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = fsub half undef, undef
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fmul half undef, undef
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %c = fadd half undef, undef
   %d = fsub half undef, undef
@@ -45,13 +45,13 @@ define void @f64() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = fadd double undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = fsub double undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fmul double undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'f64'
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = fadd double undef, undef
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = fsub double undef, undef
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fmul double undef, undef
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %c = fadd double undef, undef
   %d = fsub double undef, undef
@@ -70,7 +70,7 @@ define void @vf32() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %c8 = fadd <8 x float> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %d8 = fsub <8 x float> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %e8 = fmul <8 x float> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'vf32'
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c2 = fadd <2 x float> undef, undef
@@ -82,7 +82,7 @@ define void @vf32() {
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %c8 = fadd <8 x float> undef, undef
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %d8 = fsub <8 x float> undef, undef
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e8 = fmul <8 x float> undef, undef
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %c2 = fadd <2 x float> undef, undef
   %d2 = fsub <2 x float> undef, undef
@@ -107,7 +107,7 @@ define void @vf16() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %c8 = fadd <8 x half> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %d8 = fsub <8 x half> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %e8 = fmul <8 x half> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'vf16'
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c2 = fadd <2 x half> undef, undef
@@ -119,7 +119,7 @@ define void @vf16() {
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c8 = fadd <8 x half> undef, undef
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %d8 = fsub <8 x half> undef, undef
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e8 = fmul <8 x half> undef, undef
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %c2 = fadd <2 x half> undef, undef
   %d2 = fsub <2 x half> undef, undef
@@ -144,7 +144,7 @@ define void @vf64() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %c8 = fadd <8 x double> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %d8 = fsub <8 x double> undef, undef
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %e8 = fmul <8 x double> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'vf64'
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %c2 = fadd <2 x double> undef, undef
@@ -156,7 +156,7 @@ define void @vf64() {
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %c8 = fadd <8 x double> undef, undef
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %d8 = fsub <8 x double> undef, undef
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %e8 = fmul <8 x double> undef, undef
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %c2 = fadd <2 x double> undef, undef
   %d2 = fsub <2 x double> undef, undef

diff  --git a/llvm/test/Analysis/CostModel/ARM/gep.ll b/llvm/test/Analysis/CostModel/ARM/gep.ll
index e5ab9ef7a0e1..3956c2bb1561 100644
--- a/llvm/test/Analysis/CostModel/ARM/gep.ll
+++ b/llvm/test/Analysis/CostModel/ARM/gep.ll
@@ -21,18 +21,57 @@ define void @testi8(i8* %a, i32 %i) {
 ; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i
 ; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-; CHECK-V7M-LABEL: 'testi8'
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, i8* %a, i32 1
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, i8* %a, i32 -1
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, i8* %a, i32 31
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, i8* %a, i32 32
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, i8* %a, i32 4095
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, i8* %a, i32 4096
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-NOFP-LABEL: 'testi8'
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, i8* %a, i32 1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, i8* %a, i32 -1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, i8* %a, i32 31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, i8* %a, i32 32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, i8* %a, i32 4095
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, i8* %a, i32 4096
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-V7M-FP-LABEL: 'testi8'
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, i8* %a, i32 1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, i8* %a, i32 -1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, i8* %a, i32 31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, i8* %a, i32 32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, i8* %a, i32 4095
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, i8* %a, i32 4096
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-MVE-LABEL: 'testi8'
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, i8* %a, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, i8* %a, i32 -1
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, i8* %a, i32 31
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, i8* %a, i32 32
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, i8* %a, i32 4095
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, i8* %a, i32 4096
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEFP-LABEL: 'testi8'
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, i8* %a, i32 1
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, i8* %a, i32 -1
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, i8* %a, i32 31
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, i8* %a, i32 32
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, i8* %a, i32 4095
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, i8* %a, i32 4096
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-T32-LABEL: 'testi8'
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0
@@ -45,7 +84,7 @@ define void @testi8(i8* %a, i32 %i) {
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-A32-LABEL: 'testi8'
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0
@@ -58,7 +97,7 @@ define void @testi8(i8* %a, i32 %i) {
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %a0 = getelementptr inbounds i8, i8* %a, i32 0
   %a1 = getelementptr inbounds i8, i8* %a, i32 1
@@ -88,18 +127,57 @@ define void @testi16(i16* %a, i32 %i) {
 ; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i
 ; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-; CHECK-V7M-LABEL: 'testi16'
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* %a, i32 1
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, i16* %a, i32 -1
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, i16* %a, i32 31
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, i16* %a, i32 32
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, i16* %a, i32 2046
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, i16* %a, i32 2048
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-NOFP-LABEL: 'testi16'
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* %a, i32 1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, i16* %a, i32 -1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, i16* %a, i32 31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, i16* %a, i32 32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, i16* %a, i32 2046
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, i16* %a, i32 2048
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-V7M-FP-LABEL: 'testi16'
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* %a, i32 1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, i16* %a, i32 -1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, i16* %a, i32 31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, i16* %a, i32 32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, i16* %a, i32 2046
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, i16* %a, i32 2048
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-MVE-LABEL: 'testi16'
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* %a, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, i16* %a, i32 -1
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, i16* %a, i32 31
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, i16* %a, i32 32
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, i16* %a, i32 2046
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, i16* %a, i32 2048
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEFP-LABEL: 'testi16'
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* %a, i32 1
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, i16* %a, i32 -1
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, i16* %a, i32 31
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, i16* %a, i32 32
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, i16* %a, i32 2046
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, i16* %a, i32 2048
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-T32-LABEL: 'testi16'
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0
@@ -112,7 +190,7 @@ define void @testi16(i16* %a, i32 %i) {
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-A32-LABEL: 'testi16'
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0
@@ -125,7 +203,7 @@ define void @testi16(i16* %a, i32 %i) {
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %a0 = getelementptr inbounds i16, i16* %a, i32 0
   %a1 = getelementptr inbounds i16, i16* %a, i32 1
@@ -155,18 +233,57 @@ define void @testi32(i32* %a, i32 %i) {
 ; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i
 ; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-; CHECK-V7M-LABEL: 'testi32'
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, i32* %a, i32 1
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, i32* %a, i32 -1
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, i32* %a, i32 31
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, i32* %a, i32 32
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, i32* %a, i32 1023
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, i32* %a, i32 1024
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-NOFP-LABEL: 'testi32'
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, i32* %a, i32 1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, i32* %a, i32 -1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, i32* %a, i32 31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, i32* %a, i32 32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, i32* %a, i32 1023
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, i32* %a, i32 1024
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-V7M-FP-LABEL: 'testi32'
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, i32* %a, i32 1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, i32* %a, i32 -1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, i32* %a, i32 31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, i32* %a, i32 32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, i32* %a, i32 1023
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, i32* %a, i32 1024
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-MVE-LABEL: 'testi32'
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, i32* %a, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, i32* %a, i32 -1
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, i32* %a, i32 31
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, i32* %a, i32 32
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, i32* %a, i32 1023
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, i32* %a, i32 1024
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEFP-LABEL: 'testi32'
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, i32* %a, i32 1
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, i32* %a, i32 -1
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, i32* %a, i32 31
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, i32* %a, i32 32
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, i32* %a, i32 1023
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, i32* %a, i32 1024
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-T32-LABEL: 'testi32'
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0
@@ -179,7 +296,7 @@ define void @testi32(i32* %a, i32 %i) {
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-A32-LABEL: 'testi32'
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0
@@ -192,7 +309,7 @@ define void @testi32(i32* %a, i32 %i) {
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %a0 = getelementptr inbounds i32, i32* %a, i32 0
   %a1 = getelementptr inbounds i32, i32* %a, i32 1
@@ -224,20 +341,65 @@ define void @testi64(i64* %a, i32 %i) {
 ; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i
 ; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-; CHECK-V7M-LABEL: 'testi64'
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, i64* %a, i32 1
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, i64* %a, i32 -1
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, i64* %a, i32 15
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, i64* %a, i32 16
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, i64* %a, i32 31
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, i64* %a, i32 32
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, i64* %a, i32 1023
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, i64* %a, i32 1024
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-NOFP-LABEL: 'testi64'
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, i64* %a, i32 1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, i64* %a, i32 -1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, i64* %a, i32 15
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, i64* %a, i32 16
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, i64* %a, i32 31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, i64* %a, i32 32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, i64* %a, i32 1023
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, i64* %a, i32 1024
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-V7M-FP-LABEL: 'testi64'
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, i64* %a, i32 1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, i64* %a, i32 -1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, i64* %a, i32 15
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, i64* %a, i32 16
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, i64* %a, i32 31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, i64* %a, i32 32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, i64* %a, i32 1023
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, i64* %a, i32 1024
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-MVE-LABEL: 'testi64'
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, i64* %a, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, i64* %a, i32 -1
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, i64* %a, i32 15
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, i64* %a, i32 16
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, i64* %a, i32 31
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, i64* %a, i32 32
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, i64* %a, i32 1023
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, i64* %a, i32 1024
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEFP-LABEL: 'testi64'
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, i64* %a, i32 1
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, i64* %a, i32 -1
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, i64* %a, i32 15
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, i64* %a, i32 16
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, i64* %a, i32 31
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, i64* %a, i32 32
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, i64* %a, i32 1023
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, i64* %a, i32 1024
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-T32-LABEL: 'testi64'
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0
@@ -252,7 +414,7 @@ define void @testi64(i64* %a, i32 %i) {
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-A32-LABEL: 'testi64'
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0
@@ -267,7 +429,7 @@ define void @testi64(i64* %a, i32 %i) {
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %a0 = getelementptr inbounds i64, i64* %a, i32 0
   %a1 = getelementptr inbounds i64, i64* %a, i32 1
@@ -344,7 +506,7 @@ define void @testhalf(half* %a, i32 %i) {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds half, half* %a, i32 -63
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, half* %a, i32 -64
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, half* %a, i32 %i
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'testhalf'
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds half, half* %a, i32 0
@@ -359,7 +521,7 @@ define void @testhalf(half* %a, i32 %i) {
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds half, half* %a, i32 -63
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, half* %a, i32 -64
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, half* %a, i32 %i
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-T32-LABEL: 'testhalf'
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds half, half* %a, i32 0
@@ -374,7 +536,7 @@ define void @testhalf(half* %a, i32 %i) {
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, half* %a, i32 -63
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, half* %a, i32 -64
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, half* %a, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-A32-LABEL: 'testhalf'
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds half, half* %a, i32 0
@@ -389,7 +551,7 @@ define void @testhalf(half* %a, i32 %i) {
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, half* %a, i32 -63
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds half, half* %a, i32 -64
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, half* %a, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %a0 = getelementptr inbounds half, half* %a, i32 0
   %a1 = getelementptr inbounds half, half* %a, i32 1
@@ -466,7 +628,7 @@ define void @testfloat(float* %a, i32 %i) {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, float* %a, i32 -63
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds float, float* %a, i32 -64
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, float* %a, i32 %i
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'testfloat'
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds float, float* %a, i32 0
@@ -481,7 +643,7 @@ define void @testfloat(float* %a, i32 %i) {
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, float* %a, i32 -63
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, float* %a, i32 -64
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, float* %a, i32 %i
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-T32-LABEL: 'testfloat'
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds float, float* %a, i32 0
@@ -496,7 +658,7 @@ define void @testfloat(float* %a, i32 %i) {
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, float* %a, i32 -63
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, float* %a, i32 -64
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, float* %a, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-A32-LABEL: 'testfloat'
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds float, float* %a, i32 0
@@ -511,7 +673,7 @@ define void @testfloat(float* %a, i32 %i) {
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, float* %a, i32 -63
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, float* %a, i32 -64
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, float* %a, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %a0 = getelementptr inbounds float, float* %a, i32 0
   %a1 = getelementptr inbounds float, float* %a, i32 1
@@ -545,20 +707,65 @@ define void @testdouble(double* %a, i32 %i) {
 ; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i
 ; CHECK-V6M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-; CHECK-V7M-LABEL: 'testdouble'
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, double* %a, i32 1
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, double* %a, i32 -1
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, double* %a, i32 127
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, double* %a, i32 128
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, double* %a, i32 -127
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, double* %a, i32 -128
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, double* %a, i32 511
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, double* %a, i32 512
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i
-; CHECK-V7M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V7M-NOFP-LABEL: 'testdouble'
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, double* %a, i32 1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, double* %a, i32 -1
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, double* %a, i32 127
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, double* %a, i32 128
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, double* %a, i32 -127
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, double* %a, i32 -128
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, double* %a, i32 511
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, double* %a, i32 512
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i
+; CHECK-V7M-NOFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-V7M-FP-LABEL: 'testdouble'
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, double* %a, i32 1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, double* %a, i32 -1
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, double* %a, i32 127
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, double* %a, i32 128
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, double* %a, i32 -127
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, double* %a, i32 -128
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, double* %a, i32 511
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, double* %a, i32 512
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i
+; CHECK-V7M-FP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-MVE-LABEL: 'testdouble'
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, double* %a, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, double* %a, i32 -1
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, double* %a, i32 127
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, double* %a, i32 128
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, double* %a, i32 -127
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, double* %a, i32 -128
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, double* %a, i32 511
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, double* %a, i32 512
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEFP-LABEL: 'testdouble'
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, double* %a, i32 1
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, double* %a, i32 -1
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, double* %a, i32 127
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, double* %a, i32 128
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, double* %a, i32 -127
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, double* %a, i32 -128
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, double* %a, i32 511
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, double* %a, i32 512
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-T32-LABEL: 'testdouble'
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0
@@ -573,7 +780,7 @@ define void @testdouble(double* %a, i32 %i) {
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-A32-LABEL: 'testdouble'
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0
@@ -588,7 +795,7 @@ define void @testdouble(double* %a, i32 %i) {
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %a0 = getelementptr inbounds double, double* %a, i32 0
   %a1 = getelementptr inbounds double, double* %a, i32 1
@@ -845,7 +1052,7 @@ define void @testvecs(i32 %i) {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, i8* undef, i32 -1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'testvecs'
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
@@ -905,7 +1112,7 @@ define void @testvecs(i32 %i) {
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i
 ; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, i8* undef, i32 -1
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-T32-LABEL: 'testvecs'
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
@@ -965,7 +1172,7 @@ define void @testvecs(i32 %i) {
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i
 ; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, i8* undef, i32 -1
-; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-T32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-A32-LABEL: 'testvecs'
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
@@ -1025,7 +1232,7 @@ define void @testvecs(i32 %i) {
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i
 ; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, i8* undef, i32 -1
-; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-A32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
   %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0

diff  --git a/llvm/test/Analysis/CostModel/ARM/load_store.ll b/llvm/test/Analysis/CostModel/ARM/load_store.ll
index 8d346ea1c807..2ca4acda0fc2 100644
--- a/llvm/test/Analysis/CostModel/ARM/load_store.ll
+++ b/llvm/test/Analysis/CostModel/ARM/load_store.ll
@@ -85,7 +85,7 @@ define void @stores() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> undef, <4 x float>* undef, align 1
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, <2 x double>* undef, align 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-NEON-LABEL: 'stores'
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4
@@ -111,7 +111,7 @@ define void @stores() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 1
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, <2 x double>* undef, align 1
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8-SIZE-LABEL: 'stores'
 ; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4
@@ -272,7 +272,7 @@ define void @loads() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %21 = load <8 x i16>, <8 x i16>* undef, align 1
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = load <4 x float>, <4 x float>* undef, align 1
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %23 = load <2 x double>, <2 x double>* undef, align 1
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-NEON-LABEL: 'loads'
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4
@@ -298,7 +298,7 @@ define void @loads() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i16>, <8 x i16>* undef, align 1
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x float>, <4 x float>* undef, align 1
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x double>, <2 x double>* undef, align 1
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8-SIZE-LABEL: 'loads'
 ; CHECK-V8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4

diff  --git a/llvm/test/Analysis/CostModel/ARM/memcpy.ll b/llvm/test/Analysis/CostModel/ARM/memcpy.ll
index 4e8717ef0b90..5ebf945a887b 100644
--- a/llvm/test/Analysis/CostModel/ARM/memcpy.ll
+++ b/llvm/test/Analysis/CostModel/ARM/memcpy.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s  -cost-model -analyze -cost-kind=code-size | \
 ; RUN:    FileCheck %s --check-prefixes=COMMON,CHECK-NO-SA
 ; RUN: opt < %s  -cost-model -analyze -cost-kind=code-size -mattr=+strict-align | \
@@ -17,9 +18,9 @@ define void @memcpy_1(i8* %d, i8* %s) {
 ; ldrb r1, [r1]
 ; strb r1, [r0]
 ;
-; COMMON:           function 'memcpy_1'
-; CHECK-NO-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
-; CHECK-SA-NEXT:    cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_1'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 1, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 1, i1 false)
@@ -40,9 +41,13 @@ define void @memcpy_2(i8* %d, i8* %s) {
 ; strb  r1, [r0, #1]
 ; strb  r2, [r0]
 ;
-; COMMON:           function 'memcpy_2'
-; CHECK-NO-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
-; CHECK-SA-NEXT:    cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; CHECK-NO-SA-LABEL: 'memcpy_2'
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 2, i1 false)
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-SA-LABEL: 'memcpy_2'
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 2, i1 false)
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 2, i1 false)
@@ -67,9 +72,13 @@ define void @memcpy_3(i8* %d, i8* %s) {
 ; strb  r3, [r0, #1]
 ; strb  r2, [r0]
 ;
-; COMMON:           function 'memcpy_3'
-; CHECK-NO-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
-; CHECK-SA-NEXT:    cost of 6 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; CHECK-NO-SA-LABEL: 'memcpy_3'
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 3, i1 false)
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-SA-LABEL: 'memcpy_3'
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 3, i1 false)
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 3, i1 false)
@@ -94,9 +103,13 @@ define void @memcpy_4(i8* %d, i8* %s) {
 ; strb  r3, [r0, #1]
 ; strb.w  r12, [r0]
 ;
-; COMMON:           function 'memcpy_4'
-; CHECK-NO-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
-; CHECK-SA-NEXT:    cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; CHECK-NO-SA-LABEL: 'memcpy_4'
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 4, i1 false)
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-SA-LABEL: 'memcpy_4'
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 4, i1 false)
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 4, i1 false)
@@ -119,8 +132,9 @@ define void @memcpy_8(i8* %d, i8* %s) {
 ; bl  __aeabi_memcpy
 ; pop {r7, pc}
 ;
-; COMMON:      function 'memcpy_8'
-; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_8'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 8, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 8, i1 false)
@@ -147,9 +161,13 @@ define void @memcpy_16(i8* %d, i8* %s) {
 ; bl  __aeabi_memcpy
 ; pop {r7, pc}
 ;
-; COMMON:           function 'memcpy_16'
-; CHECK-NO-SA-NEXT: cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
-; CHECK-SA-NEXT:    cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; CHECK-NO-SA-LABEL: 'memcpy_16'
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 16, i1 false)
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-SA-LABEL: 'memcpy_16'
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 16, i1 false)
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 16, i1 false)
@@ -163,8 +181,9 @@ define void @memcpy_32(i8* %d, i8* %s, i32 %N) {
 ; movs	r2, #32
 ; bl	__aeabi_memcpy
 ;
-; COMMON:      function 'memcpy_32'
-; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_32'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 32, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 32, i1 false)
@@ -177,8 +196,9 @@ define void @memcpy_N(i8* %d, i8* %s, i32 %N) {
 ;
 ; bl __aeabi_memcpy
 ;
-; COMMON:      function 'memcpy_N'
-; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_N'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 %N, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 %N, i1 false)
@@ -196,8 +216,9 @@ define void @memcpy_1_al2(i8* %d, i8* %s) {
 ; ldrb r1, [r1]
 ; strb r1, [r0]
 ;
-; COMMON:      function 'memcpy_1_al2'
-; COMMON-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_1_al2'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 1, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 1, i1 false)
@@ -211,8 +232,9 @@ define void @memcpy_2_al2(i8* %d, i8* %s) {
 ; ldrh r1, [r1]
 ; strh r1, [r0]
 ;
-; COMMON:      function 'memcpy_2_al2'
-; COMMON-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_2_al2'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 2, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 2, i1 false)
@@ -228,8 +250,9 @@ define void @memcpy_3_al2(i8* %d, i8* %s) {
 ; ldrh r1, [r1]
 ; strh r1, [r0]
 ;
-; COMMON:      function 'memcpy_3_al2'
-; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_3_al2'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 3, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 3, i1 false)
@@ -250,9 +273,13 @@ define void @memcpy_4_al2(i8* %d, i8* %s) {
 ; ldrh  r1, [r1]
 ; strh  r1, [r0]
 ;
-; COMMON:           function 'memcpy_4_al2'
-; CHECK-NO-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
-; CHECK-SA-NEXT:    cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; CHECK-NO-SA-LABEL: 'memcpy_4_al2'
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 4, i1 false)
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-SA-LABEL: 'memcpy_4_al2'
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 4, i1 false)
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 4, i1 false)
@@ -279,9 +306,13 @@ define void @memcpy_8_al2(i8* %d, i8* %s) {
 ;	ldrh	r1, [r1]
 ;	strh	r1, [r0]
 ;
-; COMMON:           function 'memcpy_8_al2'
-; CHECK-NO-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
-; CHECK-SA-NEXT:    cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; CHECK-NO-SA-LABEL: 'memcpy_8_al2'
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 8, i1 false)
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-SA-LABEL: 'memcpy_8_al2'
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 8, i1 false)
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 8, i1 false)
@@ -306,9 +337,13 @@ define void @memcpy_16_al2(i8* %d, i8* %s) {
 ;	movs	r2, #16
 ;	bl	__aeabi_memcpy
 ;
-; COMMON:           function 'memcpy_16_al2'
-; CHECK-NO-SA-NEXT: cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
-; CHECK-SA-NEXT:    cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; CHECK-NO-SA-LABEL: 'memcpy_16_al2'
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 16, i1 false)
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-SA-LABEL: 'memcpy_16_al2'
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 16, i1 false)
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 16, i1 false)
@@ -322,8 +357,9 @@ define void @memcpy_32_al2(i8* %d, i8* %s, i32 %N) {
 ; movs r2, #32
 ; bl __aeabi_memcpy
 ;
-; COMMON:      function 'memcpy_32_al2'
-; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_32_al2'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 32, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 32, i1 false)
@@ -336,8 +372,9 @@ define void @memcpy_N_al2(i8* %d, i8* %s, i32 %N) {
 ;
 ; bl __aeabi_memcpy
 ;
-; COMMON:      function 'memcpy_N_al2'
-; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_N_al2'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 %N, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 %N, i1 false)
@@ -355,8 +392,9 @@ define void @memcpy_1_al4(i8* %d, i8* %s) {
 ; ldrb r1, [r1]
 ; strb r1, [r0]
 ;
-; COMMON:      function 'memcpy_1_al4'
-; COMMON-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_1_al4'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 1, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 1, i1 false)
@@ -370,8 +408,9 @@ define void @memcpy_2_al4(i8* %d, i8* %s) {
 ; ldrh r1, [r1]
 ; strh r1, [r0]
 ;
-; COMMON:      function 'memcpy_2_al4'
-; COMMON-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_2_al4'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 2, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 2, i1 false)
@@ -387,8 +426,9 @@ define void @memcpy_3_al4(i8* %d, i8* %s) {
 ; ldrh r1, [r1]
 ; strh r1, [r0]
 ;
-; COMMON:      function 'memcpy_3_al4'
-; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_3_al4'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 3, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 3, i1 false)
@@ -402,8 +442,9 @@ define void @memcpy_4_al4(i8* %d, i8* %s) {
 ; ldr r1, [r1]
 ; str r1, [r0]
 ;
-; COMMON:      function 'memcpy_4_al4'
-; COMMON-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_4_al4'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 4, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 4, i1 false)
@@ -417,8 +458,9 @@ define void @memcpy_8_al4(i8* %d, i8* %s) {
 ; ldrd r2, r1, [r1]
 ; strd r2, r1, [r0]
 ;
-; COMMON:      function 'memcpy_8_al4'
-; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_8_al4'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 8, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 8, i1 false)
@@ -434,8 +476,9 @@ define void @memcpy_16_al4(i8* %d, i8* %s) {
 ; stm.w  r0, {r2, r3, r12}
 ; str    r1, [r0, #12]
 ;
-; COMMON:      function 'memcpy_16_al4'
-; COMMON-NEXT: cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_16_al4'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 16, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 16, i1 false)
@@ -451,8 +494,9 @@ define void @memcpy_32_al4(i8* %d, i8* %s, i32 %N) {
 ; ldm.w  r1, {r2, r3, r12, lr}
 ; stm.w  r0, {r2, r3, r12, lr}
 ;
-; COMMON:      function 'memcpy_32_al4'
-; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_32_al4'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 32, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 32, i1 false)
@@ -465,8 +509,9 @@ define void @memcpy_N_al4(i8* %d, i8* %s, i32 %N) {
 ;
 ; bl  __aeabi_memcpy4
 ;
-; COMMON:      function 'memcpy_N_al4'
-; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_N_al4'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 %N, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 %N, i1 false)
@@ -484,8 +529,9 @@ define void @memcpy_1_al14(i8* %d, i8* %s) {
 ; ldrb r1, [r1]
 ; strb r1, [r0]
 ;
-; COMMON:       function 'memcpy_1_al14'
-; COMMON-NEXT:  cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_1_al14'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 1, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 1, i1 false)
@@ -506,9 +552,13 @@ define void @memcpy_2_al14(i8* %d, i8* %s) {
 ; strb	r1, [r0, #1]
 ; strb	r2, [r0]
 ;
-; COMMON:           function 'memcpy_2_al14'
-; CHECK-NO-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
-; CHECK-SA-NEXT:    cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; CHECK-NO-SA-LABEL: 'memcpy_2_al14'
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 2, i1 false)
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-SA-LABEL: 'memcpy_2_al14'
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 2, i1 false)
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 2, i1 false)
@@ -533,9 +583,13 @@ define void @memcpy_3_al14(i8* %d, i8* %s) {
 ; strb	r3, [r0, #1]
 ; strb	r2, [r0]
 ;
-; COMMON:           function 'memcpy_3_al14'
-; CHECK-NO-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
-; CHECK-SA-NEXT:    cost of 6 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; CHECK-NO-SA-LABEL: 'memcpy_3_al14'
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 3, i1 false)
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-SA-LABEL: 'memcpy_3_al14'
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 3, i1 false)
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 3, i1 false)
@@ -560,9 +614,13 @@ define void @memcpy_4_al14(i8* %d, i8* %s) {
 ; strb	r3, [r0, #1]
 ; strb.w	r12, [r0]
 ;
-; COMMON:      function 'memcpy_4_al14'
-; CHECK-NO-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
-; CHECK-SA-NEXT:    cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; CHECK-NO-SA-LABEL: 'memcpy_4_al14'
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 4, i1 false)
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-SA-LABEL: 'memcpy_4_al14'
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 4, i1 false)
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 4, i1 false)
@@ -585,8 +643,9 @@ define void @memcpy_8_al14(i8* %d, i8* %s) {
 ; bl  __aeabi_memcpy
 ; pop {r7, pc}
 ;
-; COMMON:      function 'memcpy_8_al14'
-; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_8_al14'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 8, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 8, i1 false)
@@ -611,9 +670,13 @@ define void @memcpy_16_al14(i8* %d, i8* %s) {
 ;	movs	r2, #16
 ;	bl	__aeabi_memcpy
 ;
-; COMMON:           function 'memcpy_16_al14'
-; CHECK-NO-SA-NEXT: cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
-; CHECK-SA-NEXT:    cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; CHECK-NO-SA-LABEL: 'memcpy_16_al14'
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 16, i1 false)
+; CHECK-NO-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; CHECK-SA-LABEL: 'memcpy_16_al14'
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 16, i1 false)
+; CHECK-SA-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 16, i1 false)
@@ -627,8 +690,9 @@ define void @memcpy_32_al14(i8* %d, i8* %s) {
 ; movs r2, #32
 ; bl   __aeabi_memcpy
 ;
-; COMMON:      function 'memcpy_32_al14'
-; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_32_al14'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 32, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 32, i1 false)
@@ -641,8 +705,9 @@ define void @memcpy_N_al14(i8* %d, i8* %s, i32 %N) {
 ;
 ; bl  __aeabi_memcpy4
 ;
-; COMMON:      function 'memcpy_N_al14'
-; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_N_al14'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 %N, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 %N, i1 false)
@@ -660,8 +725,9 @@ define void @memcpy_1_al41(i8* %d, i8* %s) {
 ; ldrb  r1, [r1]
 ; strb  r1, [r0]
 ;
-; COMMON:       function 'memcpy_1_al41'
-; COMMON-NEXT:  cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32
+; COMMON-LABEL: 'memcpy_1_al41'
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 1 %s, i32 1, i1 false)
+; COMMON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 1 %s, i32 1, i1 false)

diff  --git a/llvm/test/Analysis/CostModel/ARM/mul-cast-vect.ll b/llvm/test/Analysis/CostModel/ARM/mul-cast-vect.ll
index e88fcca1225c..63dcbe998221 100644
--- a/llvm/test/Analysis/CostModel/ARM/mul-cast-vect.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mul-cast-vect.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s  -cost-model -analyze -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
 ; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s
 ; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM
@@ -15,13 +16,18 @@ target triple = "armv7--linux-gnueabihf"
 %T464 = type <4 x i64>
 
 define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
-; COST: function 'direct':
+; COST-LABEL: 'direct'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, <4 x i32>* %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, <4 x i32>* %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %v0 = load %T432, %T432* %loadaddr
 ; ASM: vld1.64
   %v1 = load %T432, %T432* %loadaddr2
 ; ASM: vld1.64
-  %r3 = mul %T432 %v0, %v1 
-; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+  %r3 = mul %T432 %v0, %v1
 ; ASM: vmul.i32
   store %T432 %r3, %T432* %storeaddr
 ; ASM: vst1.64
@@ -29,16 +35,22 @@ define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
 }
 
 define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
-; COST: function 'ups1632':
+; COST-LABEL: 'ups1632'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, <4 x i16>* %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, <4 x i16>* %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = sext <4 x i16> %v0 to <4 x i32>
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext <4 x i16> %v1 to <4 x i32>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %r1, %r2
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %v0 = load %T416, %T416* %loadaddr
 ; ASM: vldr
   %v1 = load %T416, %T416* %loadaddr2
 ; ASM: vldr
   %r1 = sext %T416 %v0 to %T432
   %r2 = sext %T416 %v1 to %T432
-; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32>
-  %r3 = mul %T432 %r1, %r2 
-; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+  %r3 = mul %T432 %r1, %r2
 ; ASM: vmull.s16
   store %T432 %r3, %T432* %storeaddr
 ; ASM: vst1.64
@@ -46,16 +58,22 @@ define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
 }
 
 define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
-; COST: function 'upu1632':
+; COST-LABEL: 'upu1632'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, <4 x i16>* %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, <4 x i16>* %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext <4 x i16> %v0 to <4 x i32>
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = zext <4 x i16> %v1 to <4 x i32>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %r1, %r2
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %v0 = load %T416, %T416* %loadaddr
 ; ASM: vldr
   %v1 = load %T416, %T416* %loadaddr2
 ; ASM: vldr
   %r1 = zext %T416 %v0 to %T432
   %r2 = zext %T416 %v1 to %T432
-; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32>
-  %r3 = mul %T432 %r1, %r2 
-; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+  %r3 = mul %T432 %r1, %r2
 ; ASM: vmull.u16
   store %T432 %r3, %T432* %storeaddr
 ; ASM: vst1.64
@@ -63,51 +81,66 @@ define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
 }
 
 define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
-; COST: function 'ups3264':
+; COST-LABEL: 'ups3264'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, <2 x i32>* %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, <2 x i32>* %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r3 = mul <2 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %st = sext <2 x i32> %r3 to <2 x i64>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, <2 x i64>* %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %v0 = load %T232, %T232* %loadaddr
 ; ASM: vldr
   %v1 = load %T232, %T232* %loadaddr2
 ; ASM: vldr
-  %r3 = mul %T232 %v0, %v1 
+  %r3 = mul %T232 %v0, %v1
 ; ASM: vmul.i32
-; COST: cost of 1 for instruction: {{.*}} mul <2 x i32>
   %st = sext %T232 %r3 to %T264
 ; ASM: vmovl.s32
-; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64>
   store %T264 %st, %T264* %storeaddr
 ; ASM: vst1.64
   ret void
 }
 
 define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
-; COST: function 'upu3264':
+; COST-LABEL: 'upu3264'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, <2 x i32>* %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, <2 x i32>* %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r3 = mul <2 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %st = zext <2 x i32> %r3 to <2 x i64>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, <2 x i64>* %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %v0 = load %T232, %T232* %loadaddr
 ; ASM: vldr
   %v1 = load %T232, %T232* %loadaddr2
 ; ASM: vldr
-  %r3 = mul %T232 %v0, %v1 
+  %r3 = mul %T232 %v0, %v1
 ; ASM: vmul.i32
-; COST: cost of 1 for instruction: {{.*}} mul <2 x i32>
   %st = zext %T232 %r3 to %T264
 ; ASM: vmovl.u32
-; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64>
   store %T264 %st, %T264* %storeaddr
 ; ASM: vst1.64
   ret void
 }
 
 define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) {
-; COST: function 'dn3216':
+; COST-LABEL: 'dn3216'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, <4 x i32>* %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, <4 x i32>* %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %st = trunc <4 x i32> %r3 to <4 x i16>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %st, <4 x i16>* %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %v0 = load %T432, %T432* %loadaddr
 ; ASM: vld1.64
   %v1 = load %T432, %T432* %loadaddr2
 ; ASM: vld1.64
-  %r3 = mul %T432 %v0, %v1 
+  %r3 = mul %T432 %v0, %v1
 ; ASM: vmul.i32
-; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
   %st = trunc %T432 %r3 to %T416
 ; ASM: vmovn.i32
-; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16>
   store %T416 %st, %T416* %storeaddr
 ; ASM: vstr
   ret void

diff  --git a/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll b/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll
index 8c5c7415f67f..02bb080b65b6 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll
@@ -28,7 +28,7 @@ define i32 @masked_gather() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V4I8 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> undef, i32 1, <4 x i1> undef, <4 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0i8(<2 x i8*> undef, i32 1, <2 x i1> undef, <2 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 0
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
 ;
   %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 4, <4 x i1> undef, <4 x double> undef)
   %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 4, <2 x i1> undef, <2 x double> undef)
@@ -92,7 +92,7 @@ define i32 @masked_scatter() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> undef, <4 x i8*> undef, i32 1, <4 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i8.v2p0i8(<2 x i8> undef, <2 x i8*> undef, i32 1, <2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 0
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
 ;
   call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 4, <4 x i1> undef)
   call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 4, <2 x i1> undef)
@@ -153,7 +153,7 @@ define void @gep_v4i32(i32* %base, i16* %base16, i8* %base8, <4 x i32> %ind32, <
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <4 x i16*> %gepbs to <4 x i32*>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %resbs = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gepbsb, i32 4, <4 x i1> %mask, <4 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %resbs, <4 x i32*> %gepbsb, i32 4, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %gep1 = getelementptr i32, i32* %base, <4 x i32> %ind32
   %res1 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep1, i32 4, <4 x i1> %mask, <4 x i32> undef)
@@ -212,7 +212,7 @@ define void @gep_v4f32(float* %base, i16* %base16, i8* %base8, <4 x i32> %ind32,
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <4 x i16*> %gepbs to <4 x float*>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %resbs = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gepbsb, i32 4, <4 x i1> %mask, <4 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %resbs, <4 x float*> %gepbsb, i32 4, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %gep1 = getelementptr float, float* %base, <4 x i32> %ind32
   %res1 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep1, i32 4, <4 x i1> %mask, <4 x float> undef)
@@ -269,7 +269,7 @@ define void @gep_v4i16(i16* %base, <4 x i32> %ind32, <4 x i16> %ind16, <4 x i1>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res6sext = sext <4 x i16> %res6 to <4 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res6trunc = trunc <4 x i32> %res6sext to <4 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %res6trunc, <4 x i16*> %gep5, i32 4, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %gep1 = getelementptr i16, i16* %base, <4 x i32> %ind32
   %res1 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %gep1, i32 2, <4 x i1> %mask, <4 x i16> undef)
@@ -312,7 +312,7 @@ define void @gep_v4i8(i8* %base, <4 x i8> %ind8, <4 x i1> %mask)  {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res6sext = sext <4 x i8> %res6 to <4 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res6trunc = trunc <4 x i32> %res6sext to <4 x i8>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %res6trunc, <4 x i8*> %gep5, i32 4, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   ; result zext
   %gep5 = getelementptr i8, i8* %base, <4 x i8> %ind8
@@ -361,7 +361,7 @@ define void @gep_v8i16(i16* %base, i8* %base8, i32* %base32, <8 x i32> %ind32, <
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ressext = sext <8 x i16> %res to <8 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %restrunc = trunc <8 x i32> %ressext to <8 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %restrunc, <8 x i16*> %gep4, i32 4, <8 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   ; no offset ext
   %gep1 = getelementptr i16, i16* %base, <8 x i32> %ind32
@@ -434,7 +434,7 @@ define void @gep_v8f16(half* %base, i8* %base8, i32* %base32, <8 x i32> %ind32,
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <8 x i32*> %gepbs to <8 x half*>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %resbs = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gepbsb, i32 2, <8 x i1> %mask, <8 x half> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %resbs, <8 x half*> %gepbsb, i32 2, <8 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   ; no offset ext
   %gep1 = getelementptr half, half* %base, <8 x i32> %ind32
@@ -484,7 +484,7 @@ define void @gep_v8i8(i8* %base, <8 x i8> %ind8, <8 x i1> %mask)  {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res6sext = sext <8 x i8> %res6 to <8 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res6trunc = trunc <8 x i16> %res6sext to <8 x i8>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> %res6trunc, <8 x i8*> %gep5, i32 4, <8 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   ; result zext
   %indzext = zext <8 x i8> %ind8 to <8 x i32>
@@ -524,7 +524,7 @@ define void @gep_v16i8(i8* %base, i16* %base16, <16 x i8> %ind8, <16 x i32> %ind
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep4 = getelementptr i8, i8* %base, <16 x i32> %indzext
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %indtrunc = trunc <16 x i32> %ind32 to <16 x i8>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %indtrunc, <16 x i8*> %gep4, i32 2, <16 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   ; no offset ext
   %gep1 = getelementptr i8, i8* %base, <16 x i32> %ind32
@@ -563,7 +563,7 @@ define void @gep_v16i8p(<16 x i8*> %base, i32 %off, <16 x i1> %mask)  {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i8, <16 x i8*> %base, i32 %off
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gepbs, i32 2, <16 x i1> %mask, <16 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %resbs, <16 x i8*> %gepbs, i32 2, <16 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %gepbs = getelementptr i8, <16 x i8*> %base, i32 %off
   %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gepbs, i32 2, <16 x i1> %mask, <16 x i8> undef)

diff  --git a/llvm/test/Analysis/CostModel/ARM/reduce-add.ll b/llvm/test/Analysis/CostModel/ARM/reduce-add.ll
index 0cd02e3e9c51..a03f283cd1e4 100644
--- a/llvm/test/Analysis/CostModel/ARM/reduce-add.ll
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-add.ll
@@ -21,7 +21,7 @@ define i32 @reduce_i64(i32 %arg) {
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 107 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-RECIP-LABEL: 'reduce_i64'
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
@@ -29,7 +29,7 @@ define i32 @reduce_i64(i32 %arg) {
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 730 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; V8M-SIZE-LABEL: 'reduce_i64'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
@@ -82,7 +82,7 @@ define i32 @reduce_i32(i32 %arg) {
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 488 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 682 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
 ; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1070 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-RECIP-LABEL: 'reduce_i32'
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
@@ -92,7 +92,7 @@ define i32 @reduce_i32(i32 %arg) {
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 5658 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 11806 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
 ; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 36390 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; V8M-SIZE-LABEL: 'reduce_i32'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)

diff  --git a/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll
index 562a6fed4b86..8b8ef521764f 100644
--- a/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll
@@ -18,7 +18,7 @@ define i32 @reduce_i64(i32 %arg) {
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i64'
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
@@ -26,7 +26,7 @@ define i32 @reduce_i64(i32 %arg) {
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V1  = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
   %V2  = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
@@ -51,7 +51,7 @@ define i32 @reduce_i32(i32 %arg) {
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 237 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i32'
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
@@ -59,7 +59,7 @@ define i32 @reduce_i32(i32 %arg) {
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2  = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
   %V4  = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
@@ -86,7 +86,7 @@ define i32 @reduce_i16(i32 %arg) {
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 203 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 303 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 503 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i16'
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
@@ -95,7 +95,7 @@ define i32 @reduce_i16(i32 %arg) {
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2  = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
   %V4  = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
@@ -125,7 +125,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 493 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 689 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 1081 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i8'
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
@@ -135,7 +135,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2   = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
   %V4   = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)

diff  --git a/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll b/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll
index 6cae1f1fc312..a39f2ffaf648 100644
--- a/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll
@@ -18,7 +18,7 @@ define i32 @reduce_i64(i32 %arg) {
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i64'
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
@@ -26,7 +26,7 @@ define i32 @reduce_i64(i32 %arg) {
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V1  = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
   %V2  = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
@@ -51,7 +51,7 @@ define i32 @reduce_i32(i32 %arg) {
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 237 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i32'
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
@@ -59,7 +59,7 @@ define i32 @reduce_i32(i32 %arg) {
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2  = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
   %V4  = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
@@ -86,7 +86,7 @@ define i32 @reduce_i16(i32 %arg) {
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 203 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 303 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 503 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i16'
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
@@ -95,7 +95,7 @@ define i32 @reduce_i16(i32 %arg) {
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2  = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
   %V4  = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
@@ -125,7 +125,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 493 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 689 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 1081 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i8'
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
@@ -135,7 +135,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2   = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
   %V4   = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)

diff  --git a/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll
index dac7ab6e5c3a..bb3205ab3336 100644
--- a/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll
@@ -18,7 +18,7 @@ define i32 @reduce_i64(i32 %arg) {
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i64'
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
@@ -26,7 +26,7 @@ define i32 @reduce_i64(i32 %arg) {
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V1  = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
   %V2  = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
@@ -51,7 +51,7 @@ define i32 @reduce_i32(i32 %arg) {
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 237 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i32'
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
@@ -59,7 +59,7 @@ define i32 @reduce_i32(i32 %arg) {
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2  = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
   %V4  = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
@@ -86,7 +86,7 @@ define i32 @reduce_i16(i32 %arg) {
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 203 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 303 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 503 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i16'
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
@@ -95,7 +95,7 @@ define i32 @reduce_i16(i32 %arg) {
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2  = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
   %V4  = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
@@ -125,7 +125,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 493 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 689 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 1081 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i8'
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
@@ -135,7 +135,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2   = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
   %V4   = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)

diff  --git a/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll b/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll
index ddcd052853b7..a5e0e56e0465 100644
--- a/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll
@@ -18,7 +18,7 @@ define i32 @reduce_i64(i32 %arg) {
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i64'
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
@@ -26,7 +26,7 @@ define i32 @reduce_i64(i32 %arg) {
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V1  = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
   %V2  = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
@@ -51,7 +51,7 @@ define i32 @reduce_i32(i32 %arg) {
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 237 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i32'
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
@@ -59,7 +59,7 @@ define i32 @reduce_i32(i32 %arg) {
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2  = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
   %V4  = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
@@ -86,7 +86,7 @@ define i32 @reduce_i16(i32 %arg) {
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 203 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 303 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 503 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i16'
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
@@ -95,7 +95,7 @@ define i32 @reduce_i16(i32 %arg) {
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2  = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
   %V4  = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
@@ -125,7 +125,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 493 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 689 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
 ; NEON-NEXT:  Cost Model: Found an estimated cost of 1081 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i8'
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
@@ -135,7 +135,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
 ; MVE-NEXT:  Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2   = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
   %V4   = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)

diff  --git a/llvm/test/Analysis/CostModel/ARM/select.ll b/llvm/test/Analysis/CostModel/ARM/select.ll
index 32f46b1b53eb..f7e7ae0d1983 100644
--- a/llvm/test/Analysis/CostModel/ARM/select.ll
+++ b/llvm/test/Analysis/CostModel/ARM/select.ll
@@ -39,6 +39,7 @@ define void @selects() {
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
+; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-NEON-RECIP-LABEL: 'selects'
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef
@@ -69,6 +70,7 @@ define void @selects() {
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
+; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-THUMB1-RECIP-LABEL: 'selects'
 ; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef
@@ -99,6 +101,7 @@ define void @selects() {
 ; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
 ; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
 ; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
+; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-THUMB2-RECIP-LABEL: 'selects'
 ; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef
@@ -129,6 +132,7 @@ define void @selects() {
 ; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
 ; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
 ; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
+; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-MVE-SIZE-LABEL: 'selects'
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef
@@ -159,6 +163,7 @@ define void @selects() {
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-NEON-SIZE-LABEL: 'selects'
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef
@@ -189,6 +194,7 @@ define void @selects() {
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-THUMB1-SIZE-LABEL: 'selects'
 ; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef
@@ -219,6 +225,7 @@ define void @selects() {
 ; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
 ; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
 ; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-THUMB2-SIZE-LABEL: 'selects'
 ; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef
@@ -249,6 +256,7 @@ define void @selects() {
 ; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
 ; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
 ; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %v1 = select i1 undef, i8 undef, i8 undef
   %v2 = select i1 undef, i16 undef, i16 undef

diff  --git a/llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll b/llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll
index 02986ea20b4f..d171b4f35fbf 100644
--- a/llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll
+++ b/llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s  -cost-model -analyze -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
 ; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s
 ; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM
@@ -15,13 +16,18 @@ target triple = "armv7--linux-gnueabihf"
 %T464 = type <4 x i64>
 
 define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
-; COST: function 'direct':
+; COST-LABEL: 'direct'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, <4 x i32>* %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, <4 x i32>* %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %v0 = load %T432, %T432* %loadaddr
 ; ASM: vld1.64
   %v1 = load %T432, %T432* %loadaddr2
 ; ASM: vld1.64
-  %r3 = shl %T432 %v0, %v1 
-; COST: cost of 2 for instruction: {{.*}} shl <4 x i32>
+  %r3 = shl %T432 %v0, %v1
 ; ASM: vshl.i32
   store %T432 %r3, %T432* %storeaddr
 ; ASM: vst1.64
@@ -29,16 +35,22 @@ define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
 }
 
 define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
-; COST: function 'ups1632':
+; COST-LABEL: 'ups1632'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, <4 x i16>* %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, <4 x i16>* %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = sext <4 x i16> %v0 to <4 x i32>
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext <4 x i16> %v1 to <4 x i32>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %r1, %r2
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %v0 = load %T416, %T416* %loadaddr
 ; ASM: vldr
   %v1 = load %T416, %T416* %loadaddr2
 ; ASM: vldr
   %r1 = sext %T416 %v0 to %T432
   %r2 = sext %T416 %v1 to %T432
-; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32>
-  %r3 = shl %T432 %r1, %r2 
-; COST: cost of 2 for instruction: {{.*}} shl <4 x i32>
+  %r3 = shl %T432 %r1, %r2
 ; ASM: vshll.s16
   store %T432 %r3, %T432* %storeaddr
 ; ASM: vst1.64
@@ -46,16 +58,22 @@ define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
 }
 
 define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
-; COST: function 'upu1632':
+; COST-LABEL: 'upu1632'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, <4 x i16>* %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, <4 x i16>* %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext <4 x i16> %v0 to <4 x i32>
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = zext <4 x i16> %v1 to <4 x i32>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %r1, %r2
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %v0 = load %T416, %T416* %loadaddr
 ; ASM: vldr
   %v1 = load %T416, %T416* %loadaddr2
 ; ASM: vldr
   %r1 = zext %T416 %v0 to %T432
   %r2 = zext %T416 %v1 to %T432
-; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32>
-  %r3 = shl %T432 %r1, %r2 
-; COST: cost of 2 for instruction: {{.*}} shl <4 x i32>
+  %r3 = shl %T432 %r1, %r2
 ; ASM: vshll.u16
   store %T432 %r3, %T432* %storeaddr
 ; ASM: vst1.64
@@ -63,51 +81,66 @@ define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
 }
 
 define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
-; COST: function 'ups3264':
+; COST-LABEL: 'ups3264'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, <2 x i32>* %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, <2 x i32>* %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <2 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %st = sext <2 x i32> %r3 to <2 x i64>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, <2 x i64>* %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %v0 = load %T232, %T232* %loadaddr
 ; ASM: vldr
   %v1 = load %T232, %T232* %loadaddr2
 ; ASM: vldr
-  %r3 = shl %T232 %v0, %v1 
+  %r3 = shl %T232 %v0, %v1
 ; ASM: vshl.i32
-; COST: cost of 2 for instruction: {{.*}} shl <2 x i32>
   %st = sext %T232 %r3 to %T264
 ; ASM: vmovl.s32
-; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64>
   store %T264 %st, %T264* %storeaddr
 ; ASM: vst1.64
   ret void
 }
 
 define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
-; COST: function 'upu3264':
+; COST-LABEL: 'upu3264'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, <2 x i32>* %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, <2 x i32>* %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <2 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %st = zext <2 x i32> %r3 to <2 x i64>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, <2 x i64>* %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %v0 = load %T232, %T232* %loadaddr
 ; ASM: vldr
   %v1 = load %T232, %T232* %loadaddr2
 ; ASM: vldr
-  %r3 = shl %T232 %v0, %v1 
+  %r3 = shl %T232 %v0, %v1
 ; ASM: vshl.i32
-; COST: cost of 2 for instruction: {{.*}} shl <2 x i32>
   %st = zext %T232 %r3 to %T264
 ; ASM: vmovl.u32
-; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64>
   store %T264 %st, %T264* %storeaddr
 ; ASM: vst1.64
   ret void
 }
 
 define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) {
-; COST: function 'dn3216':
+; COST-LABEL: 'dn3216'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, <4 x i32>* %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, <4 x i32>* %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %st = trunc <4 x i32> %r3 to <4 x i16>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %st, <4 x i16>* %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %v0 = load %T432, %T432* %loadaddr
 ; ASM: vld1.64
   %v1 = load %T432, %T432* %loadaddr2
 ; ASM: vld1.64
-  %r3 = shl %T432 %v0, %v1 
+  %r3 = shl %T432 %v0, %v1
 ; ASM: vshl.i32
-; COST: cost of 2 for instruction: {{.*}} shl <4 x i32>
   %st = trunc %T432 %r3 to %T416
 ; ASM: vmovn.i32
-; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16>
   store %T416 %st, %T416* %storeaddr
 ; ASM: vstr
   ret void

diff  --git a/llvm/test/Analysis/CostModel/ARM/shuffle.ll b/llvm/test/Analysis/CostModel/ARM/shuffle.ll
index 6a5d0ec306d7..5d9f698f1e5a 100644
--- a/llvm/test/Analysis/CostModel/ARM/shuffle.ll
+++ b/llvm/test/Analysis/CostModel/ARM/shuffle.ll
@@ -16,7 +16,7 @@ define void @broadcast() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer
 ; CHECK-MVE-NEXT:  Cost Model: Unknown cost for instruction: %v18 = shufflevector <8 x half> undef, <8 x half> undef, <4 x i32> zeroinitializer
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-NEON-LABEL: 'broadcast'
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer
@@ -31,7 +31,7 @@ define void @broadcast() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer
 ; CHECK-NEON-NEXT:  Cost Model: Unknown cost for instruction: %v18 = shufflevector <8 x half> undef, <8 x half> undef, <4 x i32> zeroinitializer
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer
   %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer
@@ -67,7 +67,7 @@ define void @reverse() {
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v18 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-NEON-LABEL: 'reverse'
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
@@ -82,7 +82,7 @@ define void @reverse() {
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v18 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
   %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>

diff  --git a/llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll b/llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll
index d736fcc0c47a..8b691973405f 100644
--- a/llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll
+++ b/llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s  -cost-model -analyze -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
 ; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s
 ; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM
@@ -15,13 +16,18 @@ target triple = "armv7--linux-gnueabihf"
 %T464 = type <4 x i64>
 
 define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
-; COST: function 'direct':
+; COST-LABEL: 'direct'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, <4 x i32>* %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, <4 x i32>* %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %v0 = load %T432, %T432* %loadaddr
 ; ASM: vld1.64
   %v1 = load %T432, %T432* %loadaddr2
 ; ASM: vld1.64
-  %r3 = sub %T432 %v0, %v1 
-; COST: cost of 1 for instruction: {{.*}} sub <4 x i32>
+  %r3 = sub %T432 %v0, %v1
 ; ASM: vsub.i32
   store %T432 %r3, %T432* %storeaddr
 ; ASM: vst1.64
@@ -29,16 +35,22 @@ define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
 }
 
 define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
-; COST: function 'ups1632':
+; COST-LABEL: 'ups1632'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, <4 x i16>* %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, <4 x i16>* %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = sext <4 x i16> %v0 to <4 x i32>
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext <4 x i16> %v1 to <4 x i32>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %r1, %r2
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %v0 = load %T416, %T416* %loadaddr
 ; ASM: vldr
   %v1 = load %T416, %T416* %loadaddr2
 ; ASM: vldr
   %r1 = sext %T416 %v0 to %T432
   %r2 = sext %T416 %v1 to %T432
-; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32>
-  %r3 = sub %T432 %r1, %r2 
-; COST: cost of 1 for instruction: {{.*}} sub <4 x i32>
+  %r3 = sub %T432 %r1, %r2
 ; ASM: vsubl.s16
   store %T432 %r3, %T432* %storeaddr
 ; ASM: vst1.64
@@ -46,16 +58,22 @@ define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
 }
 
 define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
-; COST: function 'upu1632':
+; COST-LABEL: 'upu1632'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, <4 x i16>* %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, <4 x i16>* %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext <4 x i16> %v0 to <4 x i32>
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = zext <4 x i16> %v1 to <4 x i32>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %r1, %r2
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %v0 = load %T416, %T416* %loadaddr
 ; ASM: vldr
   %v1 = load %T416, %T416* %loadaddr2
 ; ASM: vldr
   %r1 = zext %T416 %v0 to %T432
   %r2 = zext %T416 %v1 to %T432
-; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32>
-  %r3 = sub %T432 %r1, %r2 
-; COST: cost of 1 for instruction: {{.*}} sub <4 x i32>
+  %r3 = sub %T432 %r1, %r2
 ; ASM: vsubl.u16
   store %T432 %r3, %T432* %storeaddr
 ; ASM: vst1.64
@@ -63,51 +81,66 @@ define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
 }
 
 define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
-; COST: function 'ups3264':
+; COST-LABEL: 'ups3264'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, <2 x i32>* %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, <2 x i32>* %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <2 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %st = sext <2 x i32> %r3 to <2 x i64>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, <2 x i64>* %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %v0 = load %T232, %T232* %loadaddr
 ; ASM: vldr
   %v1 = load %T232, %T232* %loadaddr2
 ; ASM: vldr
-  %r3 = sub %T232 %v0, %v1 
+  %r3 = sub %T232 %v0, %v1
 ; ASM: vsub.i32
-; COST: cost of 1 for instruction: {{.*}} sub <2 x i32>
   %st = sext %T232 %r3 to %T264
 ; ASM: vmovl.s32
-; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64>
   store %T264 %st, %T264* %storeaddr
 ; ASM: vst1.64
   ret void
 }
 
 define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
-; COST: function 'upu3264':
+; COST-LABEL: 'upu3264'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, <2 x i32>* %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, <2 x i32>* %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <2 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %st = zext <2 x i32> %r3 to <2 x i64>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, <2 x i64>* %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %v0 = load %T232, %T232* %loadaddr
 ; ASM: vldr
   %v1 = load %T232, %T232* %loadaddr2
 ; ASM: vldr
-  %r3 = sub %T232 %v0, %v1 
+  %r3 = sub %T232 %v0, %v1
 ; ASM: vsub.i32
-; COST: cost of 1 for instruction: {{.*}} sub <2 x i32>
   %st = zext %T232 %r3 to %T264
 ; ASM: vmovl.u32
-; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64>
   store %T264 %st, %T264* %storeaddr
 ; ASM: vst1.64
   ret void
 }
 
 define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) {
-; COST: function 'dn3216':
+; COST-LABEL: 'dn3216'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, <4 x i32>* %loadaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, <4 x i32>* %loadaddr2, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %v0, %v1
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %st = trunc <4 x i32> %r3 to <4 x i16>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %st, <4 x i16>* %storeaddr, align 8
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %v0 = load %T432, %T432* %loadaddr
 ; ASM: vld1.64
   %v1 = load %T432, %T432* %loadaddr2
 ; ASM: vld1.64
-  %r3 = sub %T432 %v0, %v1 
+  %r3 = sub %T432 %v0, %v1
 ; ASM: vsub.i32
-; COST: cost of 1 for instruction: {{.*}} sub <4 x i32>
   %st = trunc %T432 %r3 to %T416
 ; ASM: vmovn.i32
-; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16>
   store %T416 %st, %T416* %storeaddr
 ; ASM: vstr
   ret void