[llvm] [RISCV][CostModel] Estimate cost of llvm.vector.reduce.fmaximum/fminimum (PR #80697)
Shih-Po Hung via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 6 08:03:41 PST 2024
https://github.com/arcbbb updated https://github.com/llvm/llvm-project/pull/80697
>From 80712156edd2230970f2cc2ef97f813a667adcaf Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Mon, 5 Feb 2024 07:13:42 -0800
Subject: [PATCH 1/3] [RISCV][CostModel] Estimate cost of
llvm.vector.reduce.fmaximum/fminimum
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The ‘llvm.vector.reduce.fmaximum/fminimum.*’ intrinsics propagate NaNs.
and if any element of the vector is a NaN, the result is NaN.
RVV handles this by continuously dividing the vector until only one
remains.
This patch estimates the cost in each division, where LMUL may vary.
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 33 ++++++++++++
.../CostModel/RISCV/reduce-fmaximum.ll | 52 +++++++++----------
.../CostModel/RISCV/reduce-fminimum.ll | 52 +++++++++----------
3 files changed, 85 insertions(+), 52 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index d1db47a6061e4..691f328598bf7 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -969,6 +969,39 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
return getArithmeticReductionCost(Instruction::And, Ty, FMF, CostKind);
}
+ if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
+ if (LT.second.isScalableVector())
+ return InstructionCost::getInvalid();
+ // Following TargetLowering::expandVecReduce
+ // Example sequences to reduce v8f32 into v4f32
+ // vsetivli zero, 4, e32, m2, ta, ma
+ // vslidedown.vi v12, v10, 4
+ // vsetivli zero, 4, e32, m1, ta, ma
+ // vmfeq.vv v0, v12, v12
+ // vmfeq.vv v8, v10, v10
+ // vmerge.vvm v9, v12, v10, v0
+ // vmv.v.v v0, v8
+ // vmerge.vvm v8, v10, v12, v0
+ // vfmin.vv v9, v9, v8
+ MVT SubTy = LT.second;
+ unsigned ReduceOp =
+ IID == Intrinsic::maximum ? RISCV::VFMAX_VV : RISCV::VFMIN_VV;
+ unsigned Opcodes[] = {RISCV::VSLIDEDOWN_VI,
+ RISCV::VMFEQ_VV,
+ RISCV::VMFEQ_VV,
+ RISCV::VMERGE_VVM,
+ RISCV::VMV1R_V,
+ RISCV::VMERGE_VVM,
+ ReduceOp};
+ InstructionCost SplitCost = 0;
+ while (SubTy.getVectorNumElements() > 1) {
+ SubTy = SubTy.getHalfNumVectorElementsVT();
+ SplitCost += getRISCVInstructionCost(Opcodes, SubTy, CostKind);
+ }
+ return LT.first * SplitCost +
+ getRISCVInstructionCost({RISCV::VFMV_F_S}, SubTy, CostKind);
+ }
+
// IR Reduction is composed by two vmv and one rvv reduction instruction.
InstructionCost BaseCost = 2;
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index 1618c3833a972..ea7c2c38a82a7 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -6,23 +6,23 @@
define float @reduce_fmaximum_f32(float %arg) {
; CHECK-LABEL: 'reduce_fmaximum_f32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
;
%V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
@@ -44,21 +44,21 @@ declare float @llvm.vector.reduce.fmaximum.v128f32(<128 x float>)
define double @reduce_fmaximum_f64(double %arg) {
; CHECK-LABEL: 'reduce_fmaximum_f64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef
;
%V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index 35b18645b1f2d..d74906c77cf9e 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -6,23 +6,23 @@
define float @reduce_fmaximum_f32(float %arg) {
; CHECK-LABEL: 'reduce_fmaximum_f32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
;
%V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
@@ -44,21 +44,21 @@ declare float @llvm.vector.reduce.fminimum.v128f32(<128 x float>)
define double @reduce_fmaximum_f64(double %arg) {
; CHECK-LABEL: 'reduce_fmaximum_f64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef
;
%V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
>From 6315ac4cc5c4ba5673e0d39898d6dd320b72ff5a Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Mon, 5 Feb 2024 08:43:56 -0800
Subject: [PATCH 2/3] Model cost for VMFEQ_VV, VMV1R_V
---
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 2 ++
.../test/Analysis/CostModel/RISCV/reduce-fmaximum.ll | 12 ++++++------
.../test/Analysis/CostModel/RISCV/reduce-fminimum.ll | 12 ++++++------
3 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 691f328598bf7..fbc9d66d7d718 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -88,8 +88,10 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
case RISCV::VMV_S_X:
case RISCV::VFMV_F_S:
case RISCV::VFMV_S_F:
+ case RISCV::VMFEQ_VV:
case RISCV::VMNAND_MM:
case RISCV::VCPOP_M:
+ case RISCV::VMV1R_V:
Cost += 1;
break;
default:
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index ea7c2c38a82a7..4d14575563cd6 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -10,9 +10,9 @@ define float @reduce_fmaximum_f32(float %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -47,9 +47,9 @@ define double @reduce_fmaximum_f64(double %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index d74906c77cf9e..e66787d7dd759 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -10,9 +10,9 @@ define float @reduce_fmaximum_f32(float %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -47,9 +47,9 @@ define double @reduce_fmaximum_f64(double %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
>From 2dce9928bd1e57dc69eec6be018fab227010de2b Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Tue, 6 Feb 2024 08:01:09 -0800
Subject: [PATCH 3/3] Fix: VMFEQ_VV scales with LMUL
---
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 1 -
.../test/Analysis/CostModel/RISCV/reduce-fmaximum.ll | 12 ++++++------
.../test/Analysis/CostModel/RISCV/reduce-fminimum.ll | 12 ++++++------
3 files changed, 12 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index fbc9d66d7d718..6a94b5319ed67 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -88,7 +88,6 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
case RISCV::VMV_S_X:
case RISCV::VFMV_F_S:
case RISCV::VFMV_S_F:
- case RISCV::VMFEQ_VV:
case RISCV::VMNAND_MM:
case RISCV::VCPOP_M:
case RISCV::VMV1R_V:
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index 4d14575563cd6..64cc47f527f77 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -10,9 +10,9 @@ define float @reduce_fmaximum_f32(float %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -47,9 +47,9 @@ define double @reduce_fmaximum_f64(double %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index e66787d7dd759..dc3088bc558ea 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -10,9 +10,9 @@ define float @reduce_fmaximum_f32(float %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -47,9 +47,9 @@ define double @reduce_fmaximum_f64(double %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
More information about the llvm-commits
mailing list