[llvm] c40126e - [ARM] remove cost-kind predicate for most math op costs

Tue Nov 3 14:24:43 PST 2020

Author: Sanjay Patel
Date: 2020-11-03T17:23:46-05:00
New Revision: c40126e74017deb7c8bc0b557ad95d2e73df7cc8

URL: https://github.com/llvm/llvm-project/commit/c40126e74017deb7c8bc0b557ad95d2e73df7cc8
DIFF: https://github.com/llvm/llvm-project/commit/c40126e74017deb7c8bc0b557ad95d2e73df7cc8.diff

LOG: [ARM] remove cost-kind predicate for most math op costs

This is based on the same idea that I am using for the basic model implementation
and what I have partly already done for x86: throughput cost is number of
instructions/uops, so size/blended costs are identical except in special cases
(for example, fdiv or other known-expensive machine instructions or things like
MVE that may require cracking into >1 uop)).

Differential Revision: https://reviews.llvm.org/D90692

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/ARM/arith.ll
    llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
    llvm/test/Analysis/CostModel/ARM/reduce-add.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
    llvm/test/Transforms/SimplifyCFG/ARM/phi-eliminate.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index cbd28b47d8a3..9fac40a308a4 100644

--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1149,12 +1149,6 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
     }
   }
 
-  // TODO: Handle more cost kinds.
-  if (CostKind != TTI::TCK_RecipThroughput)
-    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
-                                         Op2Info, Opd1PropInfo,
-                                         Opd2PropInfo, Args, CxtI);
-
   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
 
   if (ST->hasNEON()) {
@@ -1249,9 +1243,12 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
   if (LooksLikeAFreeShift())
     return 0;
 
-  int BaseCost = ST->hasMVEIntegerOps() && Ty->isVectorTy()
-                     ? ST->getMVEVectorCostFactor()
-                     : 1;
+  // Default to cheap (throughput/size of 1 instruction) but adjust throughput
+  // for "multiple beats" potentially needed by MVE instructions.
+  int BaseCost = 1;
+  if (CostKind != TTI::TCK_CodeSize && ST->hasMVEIntegerOps() &&
+      Ty->isVectorTy())
+    BaseCost = ST->getMVEVectorCostFactor();
 
   // The rest of this mostly follows what is done in BaseT::getArithmeticInstrCost,
   // without treating floats as more expensive that scalars or increasing the

diff  --git a/llvm/test/Analysis/CostModel/ARM/arith.ll b/llvm/test/Analysis/CostModel/ARM/arith.ll
index a1a0d11cac2e..cbeaa8e97f06 100644
--- a/llvm/test/Analysis/CostModel/ARM/arith.ll
+++ b/llvm/test/Analysis/CostModel/ARM/arith.ll
@@ -349,15 +349,15 @@ define void @i64() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-SIZE-LABEL: 'i64'
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i64 undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = sub i64 undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = mul i64 undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i64 undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i64 undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = shl i64 undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i64 undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i64 undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i64 undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c = add i64 undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %d = sub i64 undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = mul i64 undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f = ashr i64 undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %g = lshr i64 undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %h = shl i64 undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i = and i64 undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %j = or i64 undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %k = xor i64 undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %c = add i64 undef, undef
@@ -609,12 +609,12 @@ define void @vi8() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-SIZE-LABEL: 'vi8'
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i8> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i8> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e2 = mul <2 x i8> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f2 = ashr <2 x i8> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g2 = lshr <2 x i8> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h2 = shl <2 x i8> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %c2 = add <2 x i8> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %d2 = sub <2 x i8> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %e2 = mul <2 x i8> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %f2 = ashr <2 x i8> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %g2 = lshr <2 x i8> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %h2 = shl <2 x i8> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i8> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i8> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i8> undef, undef
@@ -922,12 +922,12 @@ define void @vi16() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-SIZE-LABEL: 'vi16'
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i16> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i16> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e2 = mul <2 x i16> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f2 = ashr <2 x i16> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g2 = lshr <2 x i16> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h2 = shl <2 x i16> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %c2 = add <2 x i16> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %d2 = sub <2 x i16> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %e2 = mul <2 x i16> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %f2 = ashr <2 x i16> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %g2 = lshr <2 x i16> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %h2 = shl <2 x i16> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i16> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i16> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i16> undef, undef
@@ -949,15 +949,15 @@ define void @vi16() {
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = and <8 x i16> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j8 = or <8 x i16> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k8 = xor <8 x i16> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c16 = add <16 x i16> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d16 = sub <16 x i16> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e16 = mul <16 x i16> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = ashr <16 x i16> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g16 = lshr <16 x i16> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h16 = shl <16 x i16> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = and <16 x i16> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j16 = or <16 x i16> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k16 = xor <16 x i16> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c16 = add <16 x i16> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %d16 = sub <16 x i16> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e16 = mul <16 x i16> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = ashr <16 x i16> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %g16 = lshr <16 x i16> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %h16 = shl <16 x i16> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = and <16 x i16> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %j16 = or <16 x i16> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %k16 = xor <16 x i16> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %c2 = add <2 x i16> undef, undef
@@ -1235,12 +1235,12 @@ define void @vi32() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-SIZE-LABEL: 'vi32'
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e2 = mul <2 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f2 = ashr <2 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g2 = lshr <2 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h2 = shl <2 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %c2 = add <2 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %d2 = sub <2 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %e2 = mul <2 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %f2 = ashr <2 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %g2 = lshr <2 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %h2 = shl <2 x i32> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i32> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i32> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i32> undef, undef
@@ -1253,24 +1253,24 @@ define void @vi32() {
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i32> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i32> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c8 = add <8 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d8 = sub <8 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e8 = mul <8 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f8 = ashr <8 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g8 = lshr <8 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h8 = shl <8 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = and <8 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j8 = or <8 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k8 = xor <8 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c16 = add <16 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d16 = sub <16 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e16 = mul <16 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = ashr <16 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g16 = lshr <16 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h16 = shl <16 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = and <16 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j16 = or <16 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k16 = xor <16 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c8 = add <8 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %d8 = sub <8 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e8 = mul <8 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f8 = ashr <8 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %g8 = lshr <8 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %h8 = shl <8 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i8 = and <8 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %j8 = or <8 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %k8 = xor <8 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %c16 = add <16 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %d16 = sub <16 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e16 = mul <16 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = ashr <16 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %g16 = lshr <16 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %h16 = shl <16 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i16 = and <16 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %j16 = or <16 x i32> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %k16 = xor <16 x i32> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %c2 = add <2 x i32> undef, undef
@@ -1548,42 +1548,42 @@ define void @vi64() {
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-SIZE-LABEL: 'vi64'
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e2 = mul <2 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f2 = ashr <2 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g2 = lshr <2 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h2 = shl <2 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %c2 = add <2 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %d2 = sub <2 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = mul <2 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f2 = ashr <2 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %g2 = lshr <2 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %h2 = shl <2 x i64> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i64> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i64> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c8 = add <8 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d8 = sub <8 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e8 = mul <8 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f8 = ashr <8 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g8 = lshr <8 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h8 = shl <8 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = and <8 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j8 = or <8 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k8 = xor <8 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c16 = add <16 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d16 = sub <16 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e16 = mul <16 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = ashr <16 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g16 = lshr <16 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h16 = shl <16 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = and <16 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j16 = or <16 x i64> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k16 = xor <16 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %c4 = add <4 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %d4 = sub <4 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e4 = mul <4 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %f4 = ashr <4 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %g4 = lshr <4 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %h4 = shl <4 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i4 = and <4 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %j4 = or <4 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %k4 = xor <4 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %c8 = add <8 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %d8 = sub <8 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %e8 = mul <8 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f8 = ashr <8 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %g8 = lshr <8 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %h8 = shl <8 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i8 = and <8 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %j8 = or <8 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %k8 = xor <8 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %c16 = add <16 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %d16 = sub <16 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %e16 = mul <16 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %f16 = ashr <16 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %g16 = lshr <16 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %h16 = shl <16 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i64> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i64> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %c2 = add <2 x i64> undef, undef

diff  --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
index 4001a82dd0a6..40ff13f0d1ce 100644
--- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
@@ -147,13 +147,13 @@ define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE-LABEL: 'fshl'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 546 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE_LATE-LABEL: 'fshl'
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 562 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)

diff  --git a/llvm/test/Analysis/CostModel/ARM/reduce-add.ll b/llvm/test/Analysis/CostModel/ARM/reduce-add.ll
index b3cc0adf7460..94bd13e39df5 100644
--- a/llvm/test/Analysis/CostModel/ARM/reduce-add.ll
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-add.ll
@@ -23,10 +23,10 @@ define i32 @reduce_i64(i32 %arg) {
 ;
 ; V8M-SIZE-LABEL: 'reduce_i64'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; NEON-SIZE-LABEL: 'reduce_i64'
@@ -68,12 +68,12 @@ define i32 @reduce_i32(i32 %arg) {
 ;
 ; V8M-SIZE-LABEL: 'reduce_i32'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 133 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 262 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; NEON-SIZE-LABEL: 'reduce_i32'

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
index 5f4014fc30fe..bf3a8b786630 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
@@ -641,63 +641,53 @@ define i32 @wrongop(%struct.date* nocapture readonly %pd) {
 ; CHECK-LABEL: wrongop:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    push {r4, lr}
-; CHECK-NEXT:    mov r12, r0
-; CHECK-NEXT:    ldr r0, [r0]
-; CHECK-NEXT:    ldr.w r2, [r12, #8]
-; CHECK-NEXT:    lsls r3, r2, #30
-; CHECK-NEXT:    bne .LBB8_3
-; CHECK-NEXT:  @ %bb.1: @ %entry
-; CHECK-NEXT:    movw r3, #34079
-; CHECK-NEXT:    movt r3, #20971
-; CHECK-NEXT:    smmul r3, r2, r3
-; CHECK-NEXT:    asrs r1, r3, #5
-; CHECK-NEXT:    add.w r1, r1, r3, lsr #31
-; CHECK-NEXT:    movs r3, #100
-; CHECK-NEXT:    mls r1, r1, r3, r2
-; CHECK-NEXT:    cbz r1, .LBB8_3
-; CHECK-NEXT:  @ %bb.2:
-; CHECK-NEXT:    movs r4, #1
-; CHECK-NEXT:    b .LBB8_4
-; CHECK-NEXT:  .LBB8_3: @ %lor.rhs
-; CHECK-NEXT:    movw r1, #47184
+; CHECK-NEXT:    mov r1, r0
+; CHECK-NEXT:    movw r12, #47184
 ; CHECK-NEXT:    movw r3, #23593
-; CHECK-NEXT:    movt r1, #1310
+; CHECK-NEXT:    ldrd r2, lr, [r1, #4]
+; CHECK-NEXT:    movt r12, #1310
 ; CHECK-NEXT:    movt r3, #49807
-; CHECK-NEXT:    mla r1, r2, r3, r1
-; CHECK-NEXT:    movw r2, #55051
-; CHECK-NEXT:    movt r2, #163
-; CHECK-NEXT:    ror.w r1, r1, #4
-; CHECK-NEXT:    cmp r1, r2
-; CHECK-NEXT:    cset r4, lo
-; CHECK-NEXT:  .LBB8_4: @ %lor.end
-; CHECK-NEXT:    ldr.w r1, [r12, #4]
-; CHECK-NEXT:    cmp r1, #1
+; CHECK-NEXT:    mla r3, lr, r3, r12
+; CHECK-NEXT:    movw r1, #55051
+; CHECK-NEXT:    movw r4, #23593
+; CHECK-NEXT:    movt r1, #163
+; CHECK-NEXT:    ldr r0, [r0]
+; CHECK-NEXT:    movt r4, #655
+; CHECK-NEXT:    ror.w r12, r3, #4
+; CHECK-NEXT:    cmp r12, r1
+; CHECK-NEXT:    cset r1, lo
+; CHECK-NEXT:    ror.w r3, r3, #2
+; CHECK-NEXT:    mov.w r12, #1
+; CHECK-NEXT:    cmp r3, r4
+; CHECK-NEXT:    csel r3, r1, r12, lo
+; CHECK-NEXT:    lsls.w r4, lr, #30
+; CHECK-NEXT:    csel r1, r1, r3, ne
+; CHECK-NEXT:    cmp r2, #1
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    poplt {r4, pc}
-; CHECK-NEXT:  .LBB8_5: @ %vector.ph
-; CHECK-NEXT:    adds r3, r1, #3
-; CHECK-NEXT:    movs r2, #1
+; CHECK-NEXT:  .LBB8_1: @ %vector.ph
+; CHECK-NEXT:    adds r3, r2, #3
+; CHECK-NEXT:    movs r4, #52
 ; CHECK-NEXT:    bic r3, r3, #3
 ; CHECK-NEXT:    subs r3, #4
-; CHECK-NEXT:    add.w lr, r2, r3, lsr #2
-; CHECK-NEXT:    movw r2, :lower16:days
-; CHECK-NEXT:    movt r2, :upper16:days
-; CHECK-NEXT:    movs r3, #52
-; CHECK-NEXT:    mla r2, r4, r3, r2
+; CHECK-NEXT:    add.w lr, r12, r3, lsr #2
+; CHECK-NEXT:    movw r3, :lower16:days
+; CHECK-NEXT:    movt r3, :upper16:days
+; CHECK-NEXT:    dls lr, lr
+; CHECK-NEXT:    mla r1, r1, r4, r3
 ; CHECK-NEXT:    movs r3, #0
 ; CHECK-NEXT:    vdup.32 q0, r3
-; CHECK-NEXT:    dls lr, lr
 ; CHECK-NEXT:    vmov.32 q0[0], r0
-; CHECK-NEXT:  .LBB8_6: @ %vector.body
+; CHECK-NEXT:  .LBB8_2: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vctp.32 r1
+; CHECK-NEXT:    vctp.32 r2
 ; CHECK-NEXT:    vmov q1, q0
 ; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vldrwt.u32 q0, [r2], #16
-; CHECK-NEXT:    subs r1, #4
+; CHECK-NEXT:    vldrwt.u32 q0, [r1], #16
+; CHECK-NEXT:    subs r2, #4
 ; CHECK-NEXT:    vadd.i32 q0, q0, q1
-; CHECK-NEXT:    le lr, .LBB8_6
-; CHECK-NEXT:  @ %bb.7: @ %middle.block
+; CHECK-NEXT:    le lr, .LBB8_2
+; CHECK-NEXT:  @ %bb.3: @ %middle.block
 ; CHECK-NEXT:    vpsel q0, q0, q1
 ; CHECK-NEXT:    vaddv.u32 r0, q0
 ; CHECK-NEXT:    pop {r4, pc}

diff  --git a/llvm/test/Transforms/SimplifyCFG/ARM/phi-eliminate.ll b/llvm/test/Transforms/SimplifyCFG/ARM/phi-eliminate.ll
index abf01d3a441e..6d67c590791a 100644
--- a/llvm/test/Transforms/SimplifyCFG/ARM/phi-eliminate.ll
+++ b/llvm/test/Transforms/SimplifyCFG/ARM/phi-eliminate.ll
@@ -233,15 +233,21 @@ define i64 @test_i64(i1 %a, i1 %b, i64 %i, i64 %j, i64 %k) {
 ; CHECK-V8M-TWO-FOLD-4-NEXT:  entry:
 ; CHECK-V8M-TWO-FOLD-4-NEXT:    br i1 [[A:%.*]], label [[M:%.*]], label [[O:%.*]]
 ; CHECK-V8M-TWO-FOLD-4:       O:
+; CHECK-V8M-TWO-FOLD-4-NEXT:    br i1 [[B:%.*]], label [[P:%.*]], label [[Q:%.*]]
+; CHECK-V8M-TWO-FOLD-4:       P:
 ; CHECK-V8M-TWO-FOLD-4-NEXT:    [[IAJ:%.*]] = add i64 [[I:%.*]], [[J:%.*]]
 ; CHECK-V8M-TWO-FOLD-4-NEXT:    [[IAJAK:%.*]] = add i64 [[IAJ]], [[K:%.*]]
+; CHECK-V8M-TWO-FOLD-4-NEXT:    br label [[N:%.*]]
+; CHECK-V8M-TWO-FOLD-4:       Q:
 ; CHECK-V8M-TWO-FOLD-4-NEXT:    [[IXJ:%.*]] = xor i64 [[I]], [[J]]
 ; CHECK-V8M-TWO-FOLD-4-NEXT:    [[IXJXK:%.*]] = xor i64 [[IXJ]], [[K]]
-; CHECK-V8M-TWO-FOLD-4-NEXT:    [[WP:%.*]] = select i1 [[B:%.*]], i64 [[IAJAK]], i64 [[IXJXK]]
+; CHECK-V8M-TWO-FOLD-4-NEXT:    br label [[N]]
+; CHECK-V8M-TWO-FOLD-4:       N:
+; CHECK-V8M-TWO-FOLD-4-NEXT:    [[WP:%.*]] = phi i64 [ [[IAJAK]], [[P]] ], [ [[IXJXK]], [[Q]] ]
 ; CHECK-V8M-TWO-FOLD-4-NEXT:    [[WP2:%.*]] = add i64 [[WP]], [[WP]]
 ; CHECK-V8M-TWO-FOLD-4-NEXT:    br label [[M]]
 ; CHECK-V8M-TWO-FOLD-4:       M:
-; CHECK-V8M-TWO-FOLD-4-NEXT:    [[W:%.*]] = phi i64 [ [[WP2]], [[O]] ], [ 2, [[ENTRY:%.*]] ]
+; CHECK-V8M-TWO-FOLD-4-NEXT:    [[W:%.*]] = phi i64 [ [[WP2]], [[N]] ], [ 2, [[ENTRY:%.*]] ]
 ; CHECK-V8M-TWO-FOLD-4-NEXT:    [[R:%.*]] = add i64 [[W]], 1
 ; CHECK-V8M-TWO-FOLD-4-NEXT:    ret i64 [[R]]
 ;
@@ -265,15 +271,21 @@ define i64 @test_i64(i1 %a, i1 %b, i64 %i, i64 %j, i64 %k) {
 ; CHECK-V8M-TWO-FOLD-5-NEXT:  entry:
 ; CHECK-V8M-TWO-FOLD-5-NEXT:    br i1 [[A:%.*]], label [[M:%.*]], label [[O:%.*]]
 ; CHECK-V8M-TWO-FOLD-5:       O:
+; CHECK-V8M-TWO-FOLD-5-NEXT:    br i1 [[B:%.*]], label [[P:%.*]], label [[Q:%.*]]
+; CHECK-V8M-TWO-FOLD-5:       P:
 ; CHECK-V8M-TWO-FOLD-5-NEXT:    [[IAJ:%.*]] = add i64 [[I:%.*]], [[J:%.*]]
 ; CHECK-V8M-TWO-FOLD-5-NEXT:    [[IAJAK:%.*]] = add i64 [[IAJ]], [[K:%.*]]
+; CHECK-V8M-TWO-FOLD-5-NEXT:    br label [[N:%.*]]
+; CHECK-V8M-TWO-FOLD-5:       Q:
 ; CHECK-V8M-TWO-FOLD-5-NEXT:    [[IXJ:%.*]] = xor i64 [[I]], [[J]]
 ; CHECK-V8M-TWO-FOLD-5-NEXT:    [[IXJXK:%.*]] = xor i64 [[IXJ]], [[K]]
-; CHECK-V8M-TWO-FOLD-5-NEXT:    [[WP:%.*]] = select i1 [[B:%.*]], i64 [[IAJAK]], i64 [[IXJXK]]
+; CHECK-V8M-TWO-FOLD-5-NEXT:    br label [[N]]
+; CHECK-V8M-TWO-FOLD-5:       N:
+; CHECK-V8M-TWO-FOLD-5-NEXT:    [[WP:%.*]] = phi i64 [ [[IAJAK]], [[P]] ], [ [[IXJXK]], [[Q]] ]
 ; CHECK-V8M-TWO-FOLD-5-NEXT:    [[WP2:%.*]] = add i64 [[WP]], [[WP]]
 ; CHECK-V8M-TWO-FOLD-5-NEXT:    br label [[M]]
 ; CHECK-V8M-TWO-FOLD-5:       M:
-; CHECK-V8M-TWO-FOLD-5-NEXT:    [[W:%.*]] = phi i64 [ [[WP2]], [[O]] ], [ 2, [[ENTRY:%.*]] ]
+; CHECK-V8M-TWO-FOLD-5-NEXT:    [[W:%.*]] = phi i64 [ [[WP2]], [[N]] ], [ 2, [[ENTRY:%.*]] ]
 ; CHECK-V8M-TWO-FOLD-5-NEXT:    [[R:%.*]] = add i64 [[W]], 1
 ; CHECK-V8M-TWO-FOLD-5-NEXT:    ret i64 [[R]]
 ;
@@ -295,13 +307,23 @@ define i64 @test_i64(i1 %a, i1 %b, i64 %i, i64 %j, i64 %k) {
 ;
 ; CHECK-V8M-TWO-FOLD-6-LABEL: @test_i64(
 ; CHECK-V8M-TWO-FOLD-6-NEXT:  entry:
+; CHECK-V8M-TWO-FOLD-6-NEXT:    br i1 [[A:%.*]], label [[M:%.*]], label [[O:%.*]]
+; CHECK-V8M-TWO-FOLD-6:       O:
+; CHECK-V8M-TWO-FOLD-6-NEXT:    br i1 [[B:%.*]], label [[P:%.*]], label [[Q:%.*]]
+; CHECK-V8M-TWO-FOLD-6:       P:
 ; CHECK-V8M-TWO-FOLD-6-NEXT:    [[IAJ:%.*]] = add i64 [[I:%.*]], [[J:%.*]]
 ; CHECK-V8M-TWO-FOLD-6-NEXT:    [[IAJAK:%.*]] = add i64 [[IAJ]], [[K:%.*]]
+; CHECK-V8M-TWO-FOLD-6-NEXT:    br label [[N:%.*]]
+; CHECK-V8M-TWO-FOLD-6:       Q:
 ; CHECK-V8M-TWO-FOLD-6-NEXT:    [[IXJ:%.*]] = xor i64 [[I]], [[J]]
 ; CHECK-V8M-TWO-FOLD-6-NEXT:    [[IXJXK:%.*]] = xor i64 [[IXJ]], [[K]]
-; CHECK-V8M-TWO-FOLD-6-NEXT:    [[WP:%.*]] = select i1 [[B:%.*]], i64 [[IAJAK]], i64 [[IXJXK]]
+; CHECK-V8M-TWO-FOLD-6-NEXT:    br label [[N]]
+; CHECK-V8M-TWO-FOLD-6:       N:
+; CHECK-V8M-TWO-FOLD-6-NEXT:    [[WP:%.*]] = phi i64 [ [[IAJAK]], [[P]] ], [ [[IXJXK]], [[Q]] ]
 ; CHECK-V8M-TWO-FOLD-6-NEXT:    [[WP2:%.*]] = add i64 [[WP]], [[WP]]
-; CHECK-V8M-TWO-FOLD-6-NEXT:    [[W:%.*]] = select i1 [[A:%.*]], i64 2, i64 [[WP2]]
+; CHECK-V8M-TWO-FOLD-6-NEXT:    br label [[M]]
+; CHECK-V8M-TWO-FOLD-6:       M:
+; CHECK-V8M-TWO-FOLD-6-NEXT:    [[W:%.*]] = phi i64 [ [[WP2]], [[N]] ], [ 2, [[ENTRY:%.*]] ]
 ; CHECK-V8M-TWO-FOLD-6-NEXT:    [[R:%.*]] = add i64 [[W]], 1
 ; CHECK-V8M-TWO-FOLD-6-NEXT:    ret i64 [[R]]
 ;
@@ -344,15 +366,21 @@ define i64 @test_i64_minsize(i1 %a, i1 %b, i64 %i, i64 %j, i64 %k) #0 {
 ; CHECK-V8M-TWO-FOLD-4-NEXT:  entry:
 ; CHECK-V8M-TWO-FOLD-4-NEXT:    br i1 [[A:%.*]], label [[M:%.*]], label [[O:%.*]]
 ; CHECK-V8M-TWO-FOLD-4:       O:
+; CHECK-V8M-TWO-FOLD-4-NEXT:    br i1 [[B:%.*]], label [[P:%.*]], label [[Q:%.*]]
+; CHECK-V8M-TWO-FOLD-4:       P:
 ; CHECK-V8M-TWO-FOLD-4-NEXT:    [[IAJ:%.*]] = add i64 [[I:%.*]], [[J:%.*]]
 ; CHECK-V8M-TWO-FOLD-4-NEXT:    [[IAJAK:%.*]] = add i64 [[IAJ]], [[K:%.*]]
+; CHECK-V8M-TWO-FOLD-4-NEXT:    br label [[N:%.*]]
+; CHECK-V8M-TWO-FOLD-4:       Q:
 ; CHECK-V8M-TWO-FOLD-4-NEXT:    [[IXJ:%.*]] = xor i64 [[I]], [[J]]
 ; CHECK-V8M-TWO-FOLD-4-NEXT:    [[IXJXK:%.*]] = xor i64 [[IXJ]], [[K]]
-; CHECK-V8M-TWO-FOLD-4-NEXT:    [[WP:%.*]] = select i1 [[B:%.*]], i64 [[IAJAK]], i64 [[IXJXK]]
+; CHECK-V8M-TWO-FOLD-4-NEXT:    br label [[N]]
+; CHECK-V8M-TWO-FOLD-4:       N:
+; CHECK-V8M-TWO-FOLD-4-NEXT:    [[WP:%.*]] = phi i64 [ [[IAJAK]], [[P]] ], [ [[IXJXK]], [[Q]] ]
 ; CHECK-V8M-TWO-FOLD-4-NEXT:    [[WP2:%.*]] = add i64 [[WP]], [[WP]]
 ; CHECK-V8M-TWO-FOLD-4-NEXT:    br label [[M]]
 ; CHECK-V8M-TWO-FOLD-4:       M:
-; CHECK-V8M-TWO-FOLD-4-NEXT:    [[W:%.*]] = phi i64 [ [[WP2]], [[O]] ], [ 2, [[ENTRY:%.*]] ]
+; CHECK-V8M-TWO-FOLD-4-NEXT:    [[W:%.*]] = phi i64 [ [[WP2]], [[N]] ], [ 2, [[ENTRY:%.*]] ]
 ; CHECK-V8M-TWO-FOLD-4-NEXT:    [[R:%.*]] = add i64 [[W]], 1
 ; CHECK-V8M-TWO-FOLD-4-NEXT:    ret i64 [[R]]
 ;
@@ -376,15 +404,21 @@ define i64 @test_i64_minsize(i1 %a, i1 %b, i64 %i, i64 %j, i64 %k) #0 {
 ; CHECK-V8M-TWO-FOLD-5-NEXT:  entry:
 ; CHECK-V8M-TWO-FOLD-5-NEXT:    br i1 [[A:%.*]], label [[M:%.*]], label [[O:%.*]]
 ; CHECK-V8M-TWO-FOLD-5:       O:
+; CHECK-V8M-TWO-FOLD-5-NEXT:    br i1 [[B:%.*]], label [[P:%.*]], label [[Q:%.*]]
+; CHECK-V8M-TWO-FOLD-5:       P:
 ; CHECK-V8M-TWO-FOLD-5-NEXT:    [[IAJ:%.*]] = add i64 [[I:%.*]], [[J:%.*]]
 ; CHECK-V8M-TWO-FOLD-5-NEXT:    [[IAJAK:%.*]] = add i64 [[IAJ]], [[K:%.*]]
+; CHECK-V8M-TWO-FOLD-5-NEXT:    br label [[N:%.*]]
+; CHECK-V8M-TWO-FOLD-5:       Q:
 ; CHECK-V8M-TWO-FOLD-5-NEXT:    [[IXJ:%.*]] = xor i64 [[I]], [[J]]
 ; CHECK-V8M-TWO-FOLD-5-NEXT:    [[IXJXK:%.*]] = xor i64 [[IXJ]], [[K]]
-; CHECK-V8M-TWO-FOLD-5-NEXT:    [[WP:%.*]] = select i1 [[B:%.*]], i64 [[IAJAK]], i64 [[IXJXK]]
+; CHECK-V8M-TWO-FOLD-5-NEXT:    br label [[N]]
+; CHECK-V8M-TWO-FOLD-5:       N:
+; CHECK-V8M-TWO-FOLD-5-NEXT:    [[WP:%.*]] = phi i64 [ [[IAJAK]], [[P]] ], [ [[IXJXK]], [[Q]] ]
 ; CHECK-V8M-TWO-FOLD-5-NEXT:    [[WP2:%.*]] = add i64 [[WP]], [[WP]]
 ; CHECK-V8M-TWO-FOLD-5-NEXT:    br label [[M]]
 ; CHECK-V8M-TWO-FOLD-5:       M:
-; CHECK-V8M-TWO-FOLD-5-NEXT:    [[W:%.*]] = phi i64 [ [[WP2]], [[O]] ], [ 2, [[ENTRY:%.*]] ]
+; CHECK-V8M-TWO-FOLD-5-NEXT:    [[W:%.*]] = phi i64 [ [[WP2]], [[N]] ], [ 2, [[ENTRY:%.*]] ]
 ; CHECK-V8M-TWO-FOLD-5-NEXT:    [[R:%.*]] = add i64 [[W]], 1
 ; CHECK-V8M-TWO-FOLD-5-NEXT:    ret i64 [[R]]
 ;
@@ -406,13 +440,23 @@ define i64 @test_i64_minsize(i1 %a, i1 %b, i64 %i, i64 %j, i64 %k) #0 {
 ;
 ; CHECK-V8M-TWO-FOLD-6-LABEL: @test_i64_minsize(
 ; CHECK-V8M-TWO-FOLD-6-NEXT:  entry:
+; CHECK-V8M-TWO-FOLD-6-NEXT:    br i1 [[A:%.*]], label [[M:%.*]], label [[O:%.*]]
+; CHECK-V8M-TWO-FOLD-6:       O:
+; CHECK-V8M-TWO-FOLD-6-NEXT:    br i1 [[B:%.*]], label [[P:%.*]], label [[Q:%.*]]
+; CHECK-V8M-TWO-FOLD-6:       P:
 ; CHECK-V8M-TWO-FOLD-6-NEXT:    [[IAJ:%.*]] = add i64 [[I:%.*]], [[J:%.*]]
 ; CHECK-V8M-TWO-FOLD-6-NEXT:    [[IAJAK:%.*]] = add i64 [[IAJ]], [[K:%.*]]
+; CHECK-V8M-TWO-FOLD-6-NEXT:    br label [[N:%.*]]
+; CHECK-V8M-TWO-FOLD-6:       Q:
 ; CHECK-V8M-TWO-FOLD-6-NEXT:    [[IXJ:%.*]] = xor i64 [[I]], [[J]]
 ; CHECK-V8M-TWO-FOLD-6-NEXT:    [[IXJXK:%.*]] = xor i64 [[IXJ]], [[K]]
-; CHECK-V8M-TWO-FOLD-6-NEXT:    [[WP:%.*]] = select i1 [[B:%.*]], i64 [[IAJAK]], i64 [[IXJXK]]
+; CHECK-V8M-TWO-FOLD-6-NEXT:    br label [[N]]
+; CHECK-V8M-TWO-FOLD-6:       N:
+; CHECK-V8M-TWO-FOLD-6-NEXT:    [[WP:%.*]] = phi i64 [ [[IAJAK]], [[P]] ], [ [[IXJXK]], [[Q]] ]
 ; CHECK-V8M-TWO-FOLD-6-NEXT:    [[WP2:%.*]] = add i64 [[WP]], [[WP]]
-; CHECK-V8M-TWO-FOLD-6-NEXT:    [[W:%.*]] = select i1 [[A:%.*]], i64 2, i64 [[WP2]]
+; CHECK-V8M-TWO-FOLD-6-NEXT:    br label [[M]]
+; CHECK-V8M-TWO-FOLD-6:       M:
+; CHECK-V8M-TWO-FOLD-6-NEXT:    [[W:%.*]] = phi i64 [ [[WP2]], [[N]] ], [ 2, [[ENTRY:%.*]] ]
 ; CHECK-V8M-TWO-FOLD-6-NEXT:    [[R:%.*]] = add i64 [[W]], 1
 ; CHECK-V8M-TWO-FOLD-6-NEXT:    ret i64 [[R]]
 ;