[llvm] 649cf45 - [AArch64] Expand the SVE min/max reduction costs to NEON

David Green via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 5 15:23:33 PDT 2021


Author: David Green
Date: 2021-08-05T23:23:24+01:00
New Revision: 649cf4514dd32e0e7944f6f8d82a1b72c87a2870

URL: https://github.com/llvm/llvm-project/commit/649cf4514dd32e0e7944f6f8d82a1b72c87a2870
DIFF: https://github.com/llvm/llvm-project/commit/649cf4514dd32e0e7944f6f8d82a1b72c87a2870.diff

LOG: [AArch64] Expand the SVE min/max reduction costs to NEON

This takes the existing SVE costing for the various min/max reduction
intrinsics and expands it to NEON, where I believe it applies equally
well.

In the process it changes the lowering to use min/max cost, as opposed
to summing up the cost of ICmp+Select.

Differential Revision: https://reviews.llvm.org/D106239

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
    llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 6c3dd53819f94..063c37dcb8b17 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -14,6 +14,7 @@
 #include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/CodeGen/CostTable.h"
 #include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/IntrinsicsAArch64.h"
 #include "llvm/IR/PatternMatch.h"
@@ -1899,23 +1900,23 @@ InstructionCost
 AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
                                        bool IsUnsigned,
                                        TTI::TargetCostKind CostKind) {
-  if (!isa<ScalableVectorType>(Ty))
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+
+  if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
     return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
-  assert((isa<ScalableVectorType>(Ty) && isa<ScalableVectorType>(CondTy)) &&
-         "Both vector needs to be scalable");
 
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  assert((isa<ScalableVectorType>(Ty) == isa<ScalableVectorType>(CondTy)) &&
+         "Both vector needs to be equally scalable");
+
   InstructionCost LegalizationCost = 0;
   if (LT.first > 1) {
     Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext());
-    unsigned CmpOpcode =
-        Ty->isFPOrFPVectorTy() ? Instruction::FCmp : Instruction::ICmp;
-    LegalizationCost =
-        getCmpSelInstrCost(CmpOpcode, LegalVTy, LegalVTy,
-                           CmpInst::BAD_ICMP_PREDICATE, CostKind) +
-        getCmpSelInstrCost(Instruction::Select, LegalVTy, LegalVTy,
-                           CmpInst::BAD_ICMP_PREDICATE, CostKind);
-    LegalizationCost *= LT.first - 1;
+    unsigned MinMaxOpcode =
+        Ty->isFPOrFPVectorTy()
+            ? Intrinsic::maxnum
+            : (IsUnsigned ? Intrinsic::umin : Intrinsic::smin);
+    IntrinsicCostAttributes Attrs(MinMaxOpcode, LegalVTy, {LegalVTy, LegalVTy});
+    LegalizationCost = getIntrinsicInstrCost(Attrs, CostKind) * (LT.first - 1);
   }
 
   return LegalizationCost + /*Cost of horizontal reduction*/ 2;

diff  --git a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
index 0f1bddd7325cd..59c0947b8ee3c 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
@@ -1,24 +1,25 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=throughput -analyze | FileCheck %s
+; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=throughput -analyze | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16
+; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+fullfp16 -cost-model -cost-kind=throughput -analyze | FileCheck %s --check-prefixes=CHECK,CHECK-F16
 
 define void @reduce_umin() {
 ; CHECK-LABEL: 'reduce_umin'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 432 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 736 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 186 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4i64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1i8 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V1i8 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
@@ -42,22 +43,22 @@ define void @reduce_umin() {
 
 define void @reduce_umax() {
 ; CHECK-LABEL: 'reduce_umax'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 432 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 736 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 186 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4i64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1i8 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V1i8 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
@@ -81,22 +82,22 @@ define void @reduce_umax() {
 
 define void @reduce_smin() {
 ; CHECK-LABEL: 'reduce_smin'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 432 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 736 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 186 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4i64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1i8 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V1i8 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
@@ -120,22 +121,22 @@ define void @reduce_smin() {
 
 define void @reduce_smax() {
 ; CHECK-LABEL: 'reduce_smax'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 432 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 736 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 186 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4i64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1i8 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V1i8 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
@@ -157,23 +158,59 @@ define void @reduce_smax() {
   ret void
 }
 
-define void @reduce_fmin() {
-; CHECK-LABEL: 'reduce_fmin'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 298 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+define void @reduce_fmin16() {
+; CHECK-NOF16-LABEL: 'reduce_fmin16'
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 298 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-F16-LABEL: 'reduce_fmin16'
+; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
   %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
   %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
   %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+  ret void
+}
+
+define void @reduce_fmax16() {
+; CHECK-NOF16-LABEL: 'reduce_fmax16'
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 298 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-F16-LABEL: 'reduce_fmax16'
+; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+  %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+  %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+  %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+  ret void
+}
+
+define void @reduce_fmin() {
+; CHECK-LABEL: 'reduce_fmin'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %V2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
   %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
   %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
@@ -184,21 +221,13 @@ define void @reduce_fmin() {
 
 define void @reduce_fmax() {
 ; CHECK-LABEL: 'reduce_fmax'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 298 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
-  %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
-  %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
-  %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
   %V2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
   %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
   %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll b/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll
index ce743526ee565..2734f8f6fcfe7 100644
--- a/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll
@@ -48,7 +48,7 @@ define i32 @add.i32.v4i32(<4 x i32> %v) {
 
 define i8 @umin.i8.v8i8(<8 x i8> %v) {
 ; COST-LABEL: 'umin.i8.v8i8'
-; COST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %r = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
 ;
   %r = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v)
@@ -57,7 +57,7 @@ define i8 @umin.i8.v8i8(<8 x i8> %v) {
 
 define i8 @umin.i8.v16i8(<16 x i8> %v) {
 ; COST-LABEL: 'umin.i8.v16i8'
-; COST-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
 ;
   %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %v)
@@ -66,7 +66,7 @@ define i8 @umin.i8.v16i8(<16 x i8> %v) {
 
 define i16 @umin.i16.v4i16(<4 x i16> %v) {
 ; COST-LABEL: 'umin.i16.v4i16'
-; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
 ;
   %r = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v)
@@ -75,7 +75,7 @@ define i16 @umin.i16.v4i16(<4 x i16> %v) {
 
 define i16 @umin.i16.v8i16(<8 x i16> %v) {
 ; COST-LABEL: 'umin.i16.v8i16'
-; COST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
 ;
   %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %v)
@@ -84,7 +84,7 @@ define i16 @umin.i16.v8i16(<8 x i16> %v) {
 
 define i32 @umin.i32.v4i32(<4 x i32> %v) {
 ; COST-LABEL: 'umin.i32.v4i32'
-; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
 ;
   %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %v)
@@ -93,7 +93,7 @@ define i32 @umin.i32.v4i32(<4 x i32> %v) {
 
 define i8 @umax.i8.v8i8(<8 x i8> %v) {
 ; COST-LABEL: 'umax.i8.v8i8'
-; COST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %r = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
 ;
   %r = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v)
@@ -102,7 +102,7 @@ define i8 @umax.i8.v8i8(<8 x i8> %v) {
 
 define i8 @umax.i8.v16i8(<16 x i8> %v) {
 ; COST-LABEL: 'umax.i8.v16i8'
-; COST-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
 ;
   %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v)
@@ -111,7 +111,7 @@ define i8 @umax.i8.v16i8(<16 x i8> %v) {
 
 define i16 @umax.i16.v4i16(<4 x i16> %v) {
 ; COST-LABEL: 'umax.i16.v4i16'
-; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
 ;
   %r = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v)
@@ -120,7 +120,7 @@ define i16 @umax.i16.v4i16(<4 x i16> %v) {
 
 define i16 @umax.i16.v8i16(<8 x i16> %v) {
 ; COST-LABEL: 'umax.i16.v8i16'
-; COST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
 ;
   %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v)
@@ -129,7 +129,7 @@ define i16 @umax.i16.v8i16(<8 x i16> %v) {
 
 define i32 @umax.i32.v4i32(<4 x i32> %v) {
 ; COST-LABEL: 'umax.i32.v4i32'
-; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
 ;
   %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %v)
@@ -138,7 +138,7 @@ define i32 @umax.i32.v4i32(<4 x i32> %v) {
 
 define i8 @smin.i8.v8i8(<8 x i8> %v) {
 ; COST-LABEL: 'smin.i8.v8i8'
-; COST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %r = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
 ;
   %r = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v)
@@ -147,7 +147,7 @@ define i8 @smin.i8.v8i8(<8 x i8> %v) {
 
 define i8 @smin.i8.v16i8(<16 x i8> %v) {
 ; COST-LABEL: 'smin.i8.v16i8'
-; COST-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
 ;
   %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v)
@@ -156,7 +156,7 @@ define i8 @smin.i8.v16i8(<16 x i8> %v) {
 
 define i16 @smin.i16.v4i16(<4 x i16> %v) {
 ; COST-LABEL: 'smin.i16.v4i16'
-; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
 ;
   %r = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v)
@@ -165,7 +165,7 @@ define i16 @smin.i16.v4i16(<4 x i16> %v) {
 
 define i16 @smin.i16.v8i16(<8 x i16> %v) {
 ; COST-LABEL: 'smin.i16.v8i16'
-; COST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
 ;
   %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v)
@@ -174,7 +174,7 @@ define i16 @smin.i16.v8i16(<8 x i16> %v) {
 
 define i32 @smin.i32.v4i32(<4 x i32> %v) {
 ; COST-LABEL: 'smin.i32.v4i32'
-; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
 ;
   %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %v)
@@ -183,7 +183,7 @@ define i32 @smin.i32.v4i32(<4 x i32> %v) {
 
 define i8 @smax.i8.v8i8(<8 x i8> %v) {
 ; COST-LABEL: 'smax.i8.v8i8'
-; COST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %r = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
 ;
   %r = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v)
@@ -192,7 +192,7 @@ define i8 @smax.i8.v8i8(<8 x i8> %v) {
 
 define i8 @smax.i8.v16i8(<16 x i8> %v) {
 ; COST-LABEL: 'smax.i8.v16i8'
-; COST-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
 ;
   %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v)
@@ -201,7 +201,7 @@ define i8 @smax.i8.v16i8(<16 x i8> %v) {
 
 define i16 @smax.i16.v4i16(<4 x i16> %v) {
 ; COST-LABEL: 'smax.i16.v4i16'
-; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
 ;
   %r = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v)
@@ -210,7 +210,7 @@ define i16 @smax.i16.v4i16(<4 x i16> %v) {
 
 define i16 @smax.i16.v8i16(<8 x i16> %v) {
 ; COST-LABEL: 'smax.i16.v8i16'
-; COST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
 ;
   %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v)
@@ -219,7 +219,7 @@ define i16 @smax.i16.v8i16(<8 x i16> %v) {
 
 define i32 @smax.i32.v4i32(<4 x i32> %v) {
 ; COST-LABEL: 'smax.i32.v4i32'
-; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
 ;
   %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %v)
@@ -228,7 +228,7 @@ define i32 @smax.i32.v4i32(<4 x i32> %v) {
 
 define float @fmin.f32.v4f32(<4 x float> %v) {
 ; COST-LABEL: 'fmin.f32.v4f32'
-; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %r
 ;
   %r = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v)
@@ -237,7 +237,7 @@ define float @fmin.f32.v4f32(<4 x float> %v) {
 
 define float @fmax.f32.v4f32(<4 x float> %v) {
 ; COST-LABEL: 'fmax.f32.v4f32'
-; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %r
 ;
   %r = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v)


        


More information about the llvm-commits mailing list