[llvm] [RISCV][CostModel] Estimate cost of llvm.vector.reduce.fmaximum/fminimum (PR #80697)
Shih-Po Hung via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 21 07:44:45 PDT 2024
https://github.com/arcbbb updated https://github.com/llvm/llvm-project/pull/80697
>From 9eb0d55a4c485554b88f8473e141b497b36e4494 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Mon, 5 Feb 2024 07:13:42 -0800
Subject: [PATCH 1/3] [RISCV][CostModel] Estimate cost of
llvm.vector.reduce.fmaximum/fminimum
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The ‘llvm.vector.reduce.fmaximum/fminimum.*’ intrinsics propagate NaNs.
and if any element of the vector is a NaN.
Following #79402, the patch add the cost of NaN check (vmfne + vcpop)
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 47 ++++++++++
.../CostModel/RISCV/reduce-fmaximum.ll | 91 +++++++++++++------
.../CostModel/RISCV/reduce-fminimum.ll | 52 +++++------
3 files changed, 138 insertions(+), 52 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 8f46fdc2f7ca93..39ef29c6af30c1 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1001,6 +1001,53 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
return getArithmeticReductionCost(Instruction::And, Ty, FMF, CostKind);
}
+ if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
+ SmallVector<unsigned, 5> SplitOps;
+ SmallVector<unsigned, 3> Opcodes;
+ InstructionCost ExtraCost = 0;
+ switch (IID) {
+ case Intrinsic::maximum:
+ if (FMF.noNaNs()) {
+ SplitOps = {RISCV::VFMAX_VV};
+ Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
+ } else {
+ SplitOps = {RISCV::VMFEQ_VV, RISCV::VMERGE_VVM, RISCV::VMFEQ_VV,
+ RISCV::VMERGE_VVM, RISCV::VFMAX_VV};
+ Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
+ RISCV::VFMV_F_S};
+ // Cost of Canonical Nan
+ // lui a0, 523264
+ // fmv.w.x fa0, a0
+ ExtraCost = 2;
+ }
+ break;
+
+ case Intrinsic::minimum:
+ if (FMF.noNaNs()) {
+ SplitOps = {RISCV::VFMIN_VV};
+ Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
+ } else {
+ SplitOps = {RISCV::VMFEQ_VV, RISCV::VMERGE_VVM, RISCV::VMFEQ_VV,
+ RISCV::VMERGE_VVM, RISCV::VFMIN_VV};
+ Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
+ RISCV::VFMV_F_S};
+ // Cost of Canonical Nan
+ // lui a0, 523264
+ // fmv.w.x fa0, a0
+ ExtraCost = 2;
+ }
+ break;
+ }
+ // Add a cost for data larger than LMUL8
+ InstructionCost SplitCost =
+ (LT.first > 1)
+ ? (LT.first - 1) *
+ getRISCVInstructionCost(SplitOps, LT.second, CostKind)
+ : 0;
+ return ExtraCost + SplitCost +
+ getRISCVInstructionCost(Opcodes, LT.second, CostKind);
+ }
+
// IR Reduction is composed by two vmv and one rvv reduction instruction.
InstructionCost BaseCost = 2;
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index 1618c3833a9722..4fd57e06d07b16 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -6,23 +6,37 @@
define float @reduce_fmaximum_f32(float %arg) {
; CHECK-LABEL: 'reduce_fmaximum_f32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call fast float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = call fast float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call fast float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call fast float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call fast float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call fast float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
;
%V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
@@ -32,6 +46,13 @@ define float @reduce_fmaximum_f32(float %arg) {
%V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
%V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
%V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+call fast float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+call fast float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+call fast float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+call fast float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
+call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
ret float undef
}
declare float @llvm.vector.reduce.fmaximum.v2f32(<2 x float>)
@@ -44,21 +65,33 @@ declare float @llvm.vector.reduce.fmaximum.v128f32(<128 x float>)
define double @reduce_fmaximum_f64(double %arg) {
; CHECK-LABEL: 'reduce_fmaximum_f64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call fast double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = call fast double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call fast double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call fast double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef
;
%V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
@@ -67,6 +100,12 @@ define double @reduce_fmaximum_f64(double %arg) {
%V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
%V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
%V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+call fast double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+call fast double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+call fast double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
+call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
ret double undef
}
declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index 35b18645b1f2de..269f2dcd7caacd 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -6,23 +6,23 @@
define float @reduce_fmaximum_f32(float %arg) {
; CHECK-LABEL: 'reduce_fmaximum_f32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
;
%V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
@@ -44,21 +44,21 @@ declare float @llvm.vector.reduce.fminimum.v128f32(<128 x float>)
define double @reduce_fmaximum_f64(double %arg) {
; CHECK-LABEL: 'reduce_fmaximum_f64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef
;
%V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
>From 00bd3c20f73e83283580d3c5824a70aa01d2cd1b Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Wed, 20 Mar 2024 10:37:30 -0700
Subject: [PATCH 2/3] Address comments
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 36 +++++++++---------
.../CostModel/RISCV/reduce-fmaximum.ll | 38 +++++++++----------
.../CostModel/RISCV/reduce-fminimum.ll | 30 +++++++--------
3 files changed, 51 insertions(+), 53 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 39ef29c6af30c1..530a6165d5b5d9 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1002,50 +1002,48 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
}
if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
- SmallVector<unsigned, 5> SplitOps;
SmallVector<unsigned, 3> Opcodes;
InstructionCost ExtraCost = 0;
switch (IID) {
case Intrinsic::maximum:
if (FMF.noNaNs()) {
- SplitOps = {RISCV::VFMAX_VV};
Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
} else {
- SplitOps = {RISCV::VMFEQ_VV, RISCV::VMERGE_VVM, RISCV::VMFEQ_VV,
- RISCV::VMERGE_VVM, RISCV::VFMAX_VV};
Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
RISCV::VFMV_F_S};
- // Cost of Canonical Nan
+ // Cost of Canonical Nan + branch
// lui a0, 523264
// fmv.w.x fa0, a0
- ExtraCost = 2;
+ Type *DstTy = Ty->getScalarType();
+ const unsigned EltTyBits = DL.getTypeSizeInBits(DstTy);
+ Type *SrcTy = IntegerType::getIntNTy(DstTy->getContext(), EltTyBits);
+ ExtraCost = 1 +
+ getCastInstrCost(Instruction::UIToFP, DstTy, SrcTy,
+ TTI::CastContextHint::None, CostKind) +
+ getCFInstrCost(Instruction::Br, CostKind);
}
break;
case Intrinsic::minimum:
if (FMF.noNaNs()) {
- SplitOps = {RISCV::VFMIN_VV};
Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
} else {
- SplitOps = {RISCV::VMFEQ_VV, RISCV::VMERGE_VVM, RISCV::VMFEQ_VV,
- RISCV::VMERGE_VVM, RISCV::VFMIN_VV};
Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
RISCV::VFMV_F_S};
- // Cost of Canonical Nan
+ // Cost of Canonical Nan + branch
// lui a0, 523264
// fmv.w.x fa0, a0
- ExtraCost = 2;
+ Type *DstTy = Ty->getScalarType();
+ const unsigned EltTyBits = DL.getTypeSizeInBits(DstTy);
+ Type *SrcTy = IntegerType::getIntNTy(DstTy->getContext(), EltTyBits);
+ ExtraCost = 1 +
+ getCastInstrCost(Instruction::UIToFP, DstTy, SrcTy,
+ TTI::CastContextHint::None, CostKind) +
+ getCFInstrCost(Instruction::Br, CostKind);
}
break;
}
- // Add a cost for data larger than LMUL8
- InstructionCost SplitCost =
- (LT.first > 1)
- ? (LT.first - 1) *
- getRISCVInstructionCost(SplitOps, LT.second, CostKind)
- : 0;
- return ExtraCost + SplitCost +
- getRISCVInstructionCost(Opcodes, LT.second, CostKind);
+ return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
}
// IR Reduction is composed by two vmv and one rvv reduction instruction.
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index 4fd57e06d07b16..f91f13b2d9ec61 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -12,31 +12,31 @@ define float @reduce_fmaximum_f32(float %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call fast float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = call fast float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call fast float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call fast float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call fast float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call fast float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
;
%V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
@@ -70,28 +70,28 @@ define double @reduce_fmaximum_f64(double %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call fast double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = call fast double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call fast double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call fast double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call fast double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef
;
%V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index 269f2dcd7caacd..86b84025ad5418 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -12,17 +12,17 @@ define float @reduce_fmaximum_f32(float %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
;
%V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
@@ -49,16 +49,16 @@ define double @reduce_fmaximum_f64(double %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef
;
%V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
>From 145c3bb383481081dcda5b7e11fb9d9668c9d34a Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Thu, 21 Mar 2024 07:32:22 -0700
Subject: [PATCH 3/3] Use getScalarSizeInBits
---
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 530a6165d5b5d9..f75b3d3caa62f2 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1015,7 +1015,7 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
// lui a0, 523264
// fmv.w.x fa0, a0
Type *DstTy = Ty->getScalarType();
- const unsigned EltTyBits = DL.getTypeSizeInBits(DstTy);
+ const unsigned EltTyBits = DstTy->getScalarSizeInBits();
Type *SrcTy = IntegerType::getIntNTy(DstTy->getContext(), EltTyBits);
ExtraCost = 1 +
getCastInstrCost(Instruction::UIToFP, DstTy, SrcTy,
More information about the llvm-commits
mailing list