[llvm] ed9cb66 - [TTI][AArch64] Add basic vector_reduce_fmaximum/vector_reduce_fminimum costmodelling

Tue Jul 4 10:13:15 PDT 2023

Author: David Green
Date: 2023-07-04T18:13:10+01:00
New Revision: ed9cb663765c3f8b5af22dd6a8af1715a20d4626

URL: https://github.com/llvm/llvm-project/commit/ed9cb663765c3f8b5af22dd6a8af1715a20d4626
DIFF: https://github.com/llvm/llvm-project/commit/ed9cb663765c3f8b5af22dd6a8af1715a20d4626.diff

LOG: [TTI][AArch64] Add basic vector_reduce_fmaximum/vector_reduce_fminimum costmodelling

This adds some basic handling in TargetTransformInfo to treat
vector_reduce_fminimum/vector_reduce_fmaximum similar to
vector_reduce_fmax/vector_reduce_fmax, getting better costs via
getMinMaxReductionCost.

Differential Revision: https://reviews.llvm.org/D153548

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/BasicTTIImpl.h
    llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index d8ca03ae2c33c0..383fdd1f4d79d0 100644

--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1602,6 +1602,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     case Intrinsic::vector_reduce_smin:
     case Intrinsic::vector_reduce_fmax:
     case Intrinsic::vector_reduce_fmin:
+    case Intrinsic::vector_reduce_fmaximum:
+    case Intrinsic::vector_reduce_fminimum:
     case Intrinsic::vector_reduce_umax:
     case Intrinsic::vector_reduce_umin: {
       IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, I, 1);
@@ -1907,6 +1909,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     case Intrinsic::vector_reduce_fmin:
       return thisT()->getMinMaxReductionCost(Intrinsic::minnum, VecOpTy,
                                              ICA.getFlags(), CostKind);
+    case Intrinsic::vector_reduce_fmaximum:
+      return thisT()->getMinMaxReductionCost(Intrinsic::maximum, VecOpTy,
+                                             ICA.getFlags(), CostKind);
+    case Intrinsic::vector_reduce_fminimum:
+      return thisT()->getMinMaxReductionCost(Intrinsic::minimum, VecOpTy,
+                                             ICA.getFlags(), CostKind);
     case Intrinsic::abs: {
       // abs(X) = select(icmp(X,0),X,sub(0,X))
       Type *CondTy = RetTy->getWithNewBitWidth(1);

diff  --git a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
index 95b1e99564291b..f5a0d198f65022 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
@@ -166,10 +166,10 @@ define void @reduce_fmin16() {
 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 237 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 350 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V16f16m = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 237 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 350 for instruction: %V16f16m = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> undef)
 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-F16-LABEL: 'reduce_fmin16'
@@ -177,10 +177,10 @@ define void @reduce_fmin16() {
 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V16f16m = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> undef)
+; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef)
+; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef)
+; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef)
+; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16f16m = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> undef)
 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
@@ -200,10 +200,10 @@ define void @reduce_fmax16() {
 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 237 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 350 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V16f16m = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 237 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef)
+; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 350 for instruction: %V16f16m = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> undef)
 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-F16-LABEL: 'reduce_fmax16'
@@ -211,10 +211,10 @@ define void @reduce_fmax16() {
 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V16f16m = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> undef)
+; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef)
+; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef)
+; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef)
+; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16f16m = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> undef)
 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
@@ -235,11 +235,11 @@ define void @reduce_fmin() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2f32m = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4f32m = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V8f32m = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2f64m = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4f64m = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f32m = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f32m = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8f32m = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f64m = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f64m = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
@@ -262,11 +262,11 @@ define void @reduce_fmax() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2f32m = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4f32m = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V8f32m = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2f64m = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4f64m = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f32m = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f32m = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8f32m = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f64m = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f64m = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)