[llvm] [RISCV][CostModel] Estimate cost of llvm.vector.reduce.fmaximum/fminimum (PR #80697)
Shih-Po Hung via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 26 19:59:21 PST 2024
https://github.com/arcbbb updated https://github.com/llvm/llvm-project/pull/80697
>From 80712156edd2230970f2cc2ef97f813a667adcaf Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Mon, 5 Feb 2024 07:13:42 -0800
Subject: [PATCH 1/6] [RISCV][CostModel] Estimate cost of
llvm.vector.reduce.fmaximum/fminimum
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The ‘llvm.vector.reduce.fmaximum/fminimum.*’ intrinsics propagate NaNs.
and if any element of the vector is a NaN, the result is NaN.
RVV handles this by continuously dividing the vector until only one
remains.
This patch estimates the cost in each division, where LMUL may vary.
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 33 ++++++++++++
.../CostModel/RISCV/reduce-fmaximum.ll | 52 +++++++++----------
.../CostModel/RISCV/reduce-fminimum.ll | 52 +++++++++----------
3 files changed, 85 insertions(+), 52 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index d1db47a6061e4e..691f328598bf79 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -969,6 +969,39 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
return getArithmeticReductionCost(Instruction::And, Ty, FMF, CostKind);
}
+ if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
+ if (LT.second.isScalableVector())
+ return InstructionCost::getInvalid();
+ // Following TargetLowering::expandVecReduce
+ // Example sequences to reduce v8f32 into v4f32
+ // vsetivli zero, 4, e32, m2, ta, ma
+ // vslidedown.vi v12, v10, 4
+ // vsetivli zero, 4, e32, m1, ta, ma
+ // vmfeq.vv v0, v12, v12
+ // vmfeq.vv v8, v10, v10
+ // vmerge.vvm v9, v12, v10, v0
+ // vmv.v.v v0, v8
+ // vmerge.vvm v8, v10, v12, v0
+ // vfmin.vv v9, v9, v8
+ MVT SubTy = LT.second;
+ unsigned ReduceOp =
+ IID == Intrinsic::maximum ? RISCV::VFMAX_VV : RISCV::VFMIN_VV;
+ unsigned Opcodes[] = {RISCV::VSLIDEDOWN_VI,
+ RISCV::VMFEQ_VV,
+ RISCV::VMFEQ_VV,
+ RISCV::VMERGE_VVM,
+ RISCV::VMV1R_V,
+ RISCV::VMERGE_VVM,
+ ReduceOp};
+ InstructionCost SplitCost = 0;
+ while (SubTy.getVectorNumElements() > 1) {
+ SubTy = SubTy.getHalfNumVectorElementsVT();
+ SplitCost += getRISCVInstructionCost(Opcodes, SubTy, CostKind);
+ }
+ return LT.first * SplitCost +
+ getRISCVInstructionCost({RISCV::VFMV_F_S}, SubTy, CostKind);
+ }
+
// IR Reduction is composed by two vmv and one rvv reduction instruction.
InstructionCost BaseCost = 2;
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index 1618c3833a9722..ea7c2c38a82a73 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -6,23 +6,23 @@
define float @reduce_fmaximum_f32(float %arg) {
; CHECK-LABEL: 'reduce_fmaximum_f32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
;
%V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
@@ -44,21 +44,21 @@ declare float @llvm.vector.reduce.fmaximum.v128f32(<128 x float>)
define double @reduce_fmaximum_f64(double %arg) {
; CHECK-LABEL: 'reduce_fmaximum_f64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef
;
%V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index 35b18645b1f2de..d74906c77cf9e9 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -6,23 +6,23 @@
define float @reduce_fmaximum_f32(float %arg) {
; CHECK-LABEL: 'reduce_fmaximum_f32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
;
%V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
@@ -44,21 +44,21 @@ declare float @llvm.vector.reduce.fminimum.v128f32(<128 x float>)
define double @reduce_fmaximum_f64(double %arg) {
; CHECK-LABEL: 'reduce_fmaximum_f64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef
;
%V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
>From 6315ac4cc5c4ba5673e0d39898d6dd320b72ff5a Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Mon, 5 Feb 2024 08:43:56 -0800
Subject: [PATCH 2/6] Model cost for VMFEQ_VV, VMV1R_V
---
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 2 ++
.../test/Analysis/CostModel/RISCV/reduce-fmaximum.ll | 12 ++++++------
.../test/Analysis/CostModel/RISCV/reduce-fminimum.ll | 12 ++++++------
3 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 691f328598bf79..fbc9d66d7d718e 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -88,8 +88,10 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
case RISCV::VMV_S_X:
case RISCV::VFMV_F_S:
case RISCV::VFMV_S_F:
+ case RISCV::VMFEQ_VV:
case RISCV::VMNAND_MM:
case RISCV::VCPOP_M:
+ case RISCV::VMV1R_V:
Cost += 1;
break;
default:
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index ea7c2c38a82a73..4d14575563cd65 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -10,9 +10,9 @@ define float @reduce_fmaximum_f32(float %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -47,9 +47,9 @@ define double @reduce_fmaximum_f64(double %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index d74906c77cf9e9..e66787d7dd7592 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -10,9 +10,9 @@ define float @reduce_fmaximum_f32(float %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -47,9 +47,9 @@ define double @reduce_fmaximum_f64(double %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
>From 2dce9928bd1e57dc69eec6be018fab227010de2b Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Tue, 6 Feb 2024 08:01:09 -0800
Subject: [PATCH 3/6] Fix: VMFEQ_VV scales with LMUL
---
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 1 -
.../test/Analysis/CostModel/RISCV/reduce-fmaximum.ll | 12 ++++++------
.../test/Analysis/CostModel/RISCV/reduce-fminimum.ll | 12 ++++++------
3 files changed, 12 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index fbc9d66d7d718e..6a94b5319ed67f 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -88,7 +88,6 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
case RISCV::VMV_S_X:
case RISCV::VFMV_F_S:
case RISCV::VFMV_S_F:
- case RISCV::VMFEQ_VV:
case RISCV::VMNAND_MM:
case RISCV::VCPOP_M:
case RISCV::VMV1R_V:
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index 4d14575563cd65..64cc47f527f77a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -10,9 +10,9 @@ define float @reduce_fmaximum_f32(float %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -47,9 +47,9 @@ define double @reduce_fmaximum_f64(double %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index e66787d7dd7592..dc3088bc558ea2 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -10,9 +10,9 @@ define float @reduce_fmaximum_f32(float %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -47,9 +47,9 @@ define double @reduce_fmaximum_f64(double %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
>From 2a633dd3108ddb78d3b01264f84cda891963bc6f Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Thu, 8 Feb 2024 08:08:23 -0800
Subject: [PATCH 4/6] Make VMV1R_V cost 0
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 5 +++-
.../CostModel/RISCV/reduce-fmaximum.ll | 26 +++++++++----------
.../CostModel/RISCV/reduce-fminimum.ll | 26 +++++++++----------
3 files changed, 30 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 6a94b5319ed67f..fc3bb6b63d92e7 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -90,9 +90,12 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
case RISCV::VFMV_S_F:
case RISCV::VMNAND_MM:
case RISCV::VCPOP_M:
- case RISCV::VMV1R_V:
Cost += 1;
break;
+ case RISCV::VMV1R_V:
+ // On some micro-architectures it is register rename.
+ Cost += 0;
+ break;
default:
Cost += LMULCost;
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index 64cc47f527f77a..62a2a27aa78afa 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -6,13 +6,13 @@
define float @reduce_fmaximum_f32(float %arg) {
; CHECK-LABEL: 'reduce_fmaximum_f32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 121 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -44,12 +44,12 @@ declare float @llvm.vector.reduce.fmaximum.v128f32(<128 x float>)
define double @reduce_fmaximum_f64(double %arg) {
; CHECK-LABEL: 'reduce_fmaximum_f64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index dc3088bc558ea2..e1021a4ef882eb 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -6,13 +6,13 @@
define float @reduce_fmaximum_f32(float %arg) {
; CHECK-LABEL: 'reduce_fmaximum_f32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 121 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -44,12 +44,12 @@ declare float @llvm.vector.reduce.fminimum.v128f32(<128 x float>)
define double @reduce_fmaximum_f64(double %arg) {
; CHECK-LABEL: 'reduce_fmaximum_f64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
>From b58ac6188facc0267d0d9e2b46cf7978c147cc14 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Mon, 26 Feb 2024 17:44:24 -0800
Subject: [PATCH 5/6] Remove VMV1R_V since it can be avoided by scheduling
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 10 ++-----
.../CostModel/RISCV/reduce-fmaximum.ll | 26 +++++++++----------
.../CostModel/RISCV/reduce-fminimum.ll | 26 +++++++++----------
3 files changed, 28 insertions(+), 34 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index fc3bb6b63d92e7..4dea4ad36e424b 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -92,10 +92,6 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
case RISCV::VCPOP_M:
Cost += 1;
break;
- case RISCV::VMV1R_V:
- // On some micro-architectures it is register rename.
- Cost += 0;
- break;
default:
Cost += LMULCost;
}
@@ -982,19 +978,17 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
// vslidedown.vi v12, v10, 4
// vsetivli zero, 4, e32, m1, ta, ma
// vmfeq.vv v0, v12, v12
- // vmfeq.vv v8, v10, v10
// vmerge.vvm v9, v12, v10, v0
- // vmv.v.v v0, v8
+ // vmfeq.vv v0, v10, v10
// vmerge.vvm v8, v10, v12, v0
// vfmin.vv v9, v9, v8
MVT SubTy = LT.second;
unsigned ReduceOp =
IID == Intrinsic::maximum ? RISCV::VFMAX_VV : RISCV::VFMIN_VV;
unsigned Opcodes[] = {RISCV::VSLIDEDOWN_VI,
- RISCV::VMFEQ_VV,
RISCV::VMFEQ_VV,
RISCV::VMERGE_VVM,
- RISCV::VMV1R_V,
+ RISCV::VMFEQ_VV,
RISCV::VMERGE_VVM,
ReduceOp};
InstructionCost SplitCost = 0;
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index 62a2a27aa78afa..cc3e855147d550 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -16,13 +16,13 @@ define float @reduce_fmaximum_f32(float %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32 = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64 = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V128 = call float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
;
%V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
@@ -53,12 +53,12 @@ define double @reduce_fmaximum_f64(double %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32 = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef
;
%V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index e1021a4ef882eb..d2a534f2356746 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -16,13 +16,13 @@ define float @reduce_fmaximum_f32(float %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
;
%V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
@@ -53,12 +53,12 @@ define double @reduce_fmaximum_f64(double %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef
;
%V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
>From c821d81eaa9f1d4b1c3f340167796bbcf1a375f5 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Mon, 26 Feb 2024 19:59:00 -0800
Subject: [PATCH 6/6] clang-formatted
---
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 4dea4ad36e424b..255c275f4ab185 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -985,12 +985,9 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
MVT SubTy = LT.second;
unsigned ReduceOp =
IID == Intrinsic::maximum ? RISCV::VFMAX_VV : RISCV::VFMIN_VV;
- unsigned Opcodes[] = {RISCV::VSLIDEDOWN_VI,
- RISCV::VMFEQ_VV,
- RISCV::VMERGE_VVM,
- RISCV::VMFEQ_VV,
- RISCV::VMERGE_VVM,
- ReduceOp};
+ unsigned Opcodes[] = {RISCV::VSLIDEDOWN_VI, RISCV::VMFEQ_VV,
+ RISCV::VMERGE_VVM, RISCV::VMFEQ_VV,
+ RISCV::VMERGE_VVM, ReduceOp};
InstructionCost SplitCost = 0;
while (SubTy.getVectorNumElements() > 1) {
SubTy = SubTy.getHalfNumVectorElementsVT();
More information about the llvm-commits
mailing list