[llvm] ed9cb66 - [TTI][AArch64] Add basic vector_reduce_fmaximum/vector_reduce_fminimum costmodelling
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 4 10:13:15 PDT 2023
Author: David Green
Date: 2023-07-04T18:13:10+01:00
New Revision: ed9cb663765c3f8b5af22dd6a8af1715a20d4626
URL: https://github.com/llvm/llvm-project/commit/ed9cb663765c3f8b5af22dd6a8af1715a20d4626
DIFF: https://github.com/llvm/llvm-project/commit/ed9cb663765c3f8b5af22dd6a8af1715a20d4626.diff
LOG: [TTI][AArch64] Add basic vector_reduce_fmaximum/vector_reduce_fminimum costmodelling
This adds some basic handling in TargetTransformInfo to treat
vector_reduce_fminimum/vector_reduce_fmaximum similar to
vector_reduce_fmax/vector_reduce_fmax, getting better costs via
getMinMaxReductionCost.
Differential Revision: https://reviews.llvm.org/D153548
Added:
Modified:
llvm/include/llvm/CodeGen/BasicTTIImpl.h
llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index d8ca03ae2c33c0..383fdd1f4d79d0 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1602,6 +1602,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::vector_reduce_smin:
case Intrinsic::vector_reduce_fmax:
case Intrinsic::vector_reduce_fmin:
+ case Intrinsic::vector_reduce_fmaximum:
+ case Intrinsic::vector_reduce_fminimum:
case Intrinsic::vector_reduce_umax:
case Intrinsic::vector_reduce_umin: {
IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, I, 1);
@@ -1907,6 +1909,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::vector_reduce_fmin:
return thisT()->getMinMaxReductionCost(Intrinsic::minnum, VecOpTy,
ICA.getFlags(), CostKind);
+ case Intrinsic::vector_reduce_fmaximum:
+ return thisT()->getMinMaxReductionCost(Intrinsic::maximum, VecOpTy,
+ ICA.getFlags(), CostKind);
+ case Intrinsic::vector_reduce_fminimum:
+ return thisT()->getMinMaxReductionCost(Intrinsic::minimum, VecOpTy,
+ ICA.getFlags(), CostKind);
case Intrinsic::abs: {
// abs(X) = select(icmp(X,0),X,sub(0,X))
Type *CondTy = RetTy->getWithNewBitWidth(1);
diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
index 95b1e99564291b..f5a0d198f65022 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
@@ -166,10 +166,10 @@ define void @reduce_fmin16() {
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16f16m = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V16f16m = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-F16-LABEL: 'reduce_fmin16'
@@ -177,10 +177,10 @@ define void @reduce_fmin16() {
; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
-; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef)
-; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef)
-; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef)
-; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16f16m = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> undef)
+; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef)
+; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef)
+; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef)
+; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16f16m = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> undef)
; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
@@ -200,10 +200,10 @@ define void @reduce_fmax16() {
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16f16m = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V16f16m = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-F16-LABEL: 'reduce_fmax16'
@@ -211,10 +211,10 @@ define void @reduce_fmax16() {
; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
-; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef)
-; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef)
-; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef)
-; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16f16m = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> undef)
+; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef)
+; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef)
+; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef)
+; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16f16m = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> undef)
; CHECK-F16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
@@ -235,11 +235,11 @@ define void @reduce_fmin() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2f32m = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4f32m = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8f32m = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2f64m = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4f64m = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f32m = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f32m = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8f32m = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64m = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64m = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%V2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
@@ -262,11 +262,11 @@ define void @reduce_fmax() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2f32m = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4f32m = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8f32m = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2f64m = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4f64m = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f32m = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f32m = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8f32m = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64m = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64m = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%V2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
More information about the llvm-commits
mailing list