[llvm] [RISCV] Fix the cost of `llvm.vector.reduce.and` (PR #119160)

Thu Jan 2 18:20:12 PST 2025

https://github.com/sunshaoce updated https://github.com/llvm/llvm-project/pull/119160

>From f856f15393ac6f687b5b45012e8d02451fe39baa Mon Sep 17 00:00:00 2001
From: Shao-Ce SUN <sunshaoce at outlook.com>
Date: Tue, 10 Dec 2024 09:59:05 +0800
Subject: [PATCH] [RISCV] Fix the cost of llvm.vector.reduce.and

---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp | 19 +++++++++++++++++--
 .../Analysis/CostModel/RISCV/reduce-and.ll    | 16 ++++++++--------
 .../Analysis/CostModel/RISCV/reduce-max.ll    |  4 ++--
 .../Analysis/CostModel/RISCV/reduce-min.ll    |  4 ++--
 4 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 909a64e9742554..dc423501104c44 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1536,6 +1536,14 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
   std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
   Type *ElementTy = Ty->getElementType();
   if (ElementTy->isIntegerTy(1)) {
+    // Example sequences:
+    //   vfirst.m a0, v0
+    //   seqz a0, a0
+    if (LT.second == MVT::v1i1)
+      return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second, CostKind) +
+             getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
+                                CmpInst::ICMP_EQ, CostKind);
+
     if (ISD == ISD::AND) {
       // Example sequences:
       //   vsetvli a0, zero, e8, mf8, ta, ma
@@ -1543,8 +1551,15 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
       //   vmnot.m v8, v0
       //   vcpop.m a0, v8
       //   seqz a0, a0
-      return LT.first * getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
-                                                CostKind) +
+
+      // Fixed VT:    In v512i1 and larger vector elements,
+      // Scalable VT: In v128i1 and larger vector elements,
+      // the VMAND_MM instructions have started to be added.
+      return ((LT.first >= 2)
+                  ? LT.first - (LT.second.isScalableVector() ? 1 : 2)
+                  : 0) *
+                 getRISCVInstructionCost(RISCV::VMAND_MM, LT.second, CostKind) +
+             getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second, CostKind) +
              getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) +
              getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
                                 CmpInst::ICMP_EQ, CostKind);
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
index 463232f082f40b..7b626c426fc226 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@@ -6,7 +6,7 @@
 
 define i32 @reduce_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
@@ -14,9 +14,9 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> undef)
@@ -31,7 +31,7 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
@@ -39,9 +39,9 @@ define i32 @reduce_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
index f11e9f2b5ae837..5c9303af31747e 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
@@ -176,7 +176,7 @@ define i32 @reduce_umax_i64(i32 %arg) {
 
 define i32 @reduce_smin_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_smin_i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.smax.v2i1(<2 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.smax.v4i1(<4 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.smax.v8i1(<8 x i1> undef)
@@ -187,7 +187,7 @@ define i32 @reduce_smin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i1'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.smax.v2i1(<2 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.smax.v4i1(<4 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.smax.v8i1(<8 x i1> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
index 457fdbe46f73b5..9875d3e5858115 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
@@ -6,7 +6,7 @@
 
 define i32 @reduce_umin_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_umin_i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.umin.v2i1(<2 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.umin.v4i1(<4 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.umin.v8i1(<8 x i1> undef)
@@ -17,7 +17,7 @@ define i32 @reduce_umin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i1'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.umin.v2i1(<2 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.umin.v4i1(<4 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.umin.v8i1(<8 x i1> undef)