[llvm] [RISCV] Fix the cost of `llvm.vector.reduce.and` (PR #119160)
Shao-Ce SUN via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 6 22:59:28 PST 2025
https://github.com/sunshaoce updated https://github.com/llvm/llvm-project/pull/119160
>From 1d625243652c7f2919d93f9413176355e8adc05b Mon Sep 17 00:00:00 2001
From: Shao-Ce SUN <sunshaoce at outlook.com>
Date: Tue, 10 Dec 2024 09:59:05 +0800
Subject: [PATCH 1/3] [RISCV] Fix the cost of llvm.vector.reduce.and
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 19 +++++++++++++++++--
.../Analysis/CostModel/RISCV/reduce-and.ll | 16 ++++++++--------
.../Analysis/CostModel/RISCV/reduce-max.ll | 4 ++--
.../Analysis/CostModel/RISCV/reduce-min.ll | 4 ++--
4 files changed, 29 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 850d6244affa503..8c6f5c13a101a5e 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1536,6 +1536,14 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
Type *ElementTy = Ty->getElementType();
if (ElementTy->isIntegerTy(1)) {
+ // Example sequences:
+ // vfirst.m a0, v0
+ // seqz a0, a0
+ if (LT.second == MVT::v1i1)
+ return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second, CostKind) +
+ getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
+ CmpInst::ICMP_EQ, CostKind);
+
if (ISD == ISD::AND) {
// Example sequences:
// vsetvli a0, zero, e8, mf8, ta, ma
@@ -1543,8 +1551,15 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
// vmnot.m v8, v0
// vcpop.m a0, v8
// seqz a0, a0
- return LT.first * getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
- CostKind) +
+
+ // Fixed VT: In v512i1 and larger vector elements,
+ // Scalable VT: In v128i1 and larger vector elements,
+ // the VMAND_MM instructions have started to be added.
+ return ((LT.first >= 2)
+ ? LT.first - (LT.second.isScalableVector() ? 1 : 2)
+ : 0) *
+ getRISCVInstructionCost(RISCV::VMAND_MM, LT.second, CostKind) +
+ getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second, CostKind) +
getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) +
getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
CmpInst::ICMP_EQ, CostKind);
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
index 463232f082f40be..7b626c426fc226d 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@@ -6,7 +6,7 @@
define i32 @reduce_i1(i32 %arg) {
; CHECK-LABEL: 'reduce_i1'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
@@ -14,9 +14,9 @@ define i32 @reduce_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> undef)
@@ -31,7 +31,7 @@ define i32 @reduce_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i1'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
@@ -39,9 +39,9 @@ define i32 @reduce_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
index f11e9f2b5ae8374..5c9303af31747e2 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
@@ -176,7 +176,7 @@ define i32 @reduce_umax_i64(i32 %arg) {
define i32 @reduce_smin_i1(i32 %arg) {
; CHECK-LABEL: 'reduce_smin_i1'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.smax.v2i1(<2 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.smax.v4i1(<4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.smax.v8i1(<8 x i1> undef)
@@ -187,7 +187,7 @@ define i32 @reduce_smin_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_smin_i1'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.smax.v2i1(<2 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.smax.v4i1(<4 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.smax.v8i1(<8 x i1> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
index 457fdbe46f73b5d..9875d3e58581154 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
@@ -6,7 +6,7 @@
define i32 @reduce_umin_i1(i32 %arg) {
; CHECK-LABEL: 'reduce_umin_i1'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.umin.v2i1(<2 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.umin.v4i1(<4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.umin.v8i1(<8 x i1> undef)
@@ -17,7 +17,7 @@ define i32 @reduce_umin_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_umin_i1'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.umin.v2i1(<2 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.umin.v4i1(<4 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.umin.v8i1(<8 x i1> undef)
>From 0a55282bdaefc987bee0b136ef7dbca4e40765fa Mon Sep 17 00:00:00 2001
From: Shao-Ce SUN <sunshaoce at outlook.com>
Date: Mon, 6 Jan 2025 16:25:12 +0800
Subject: [PATCH 2/3] fixup! Addressed comments.
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 19 +-
.../Analysis/CostModel/RISCV/reduce-and-i1.ll | 239 ++++++++
llvm/test/CodeGen/RISCV/rvv/reduce-and-i1.ll | 548 ++++++++++++++++++
3 files changed, 799 insertions(+), 7 deletions(-)
create mode 100644 llvm/test/Analysis/CostModel/RISCV/reduce-and-i1.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/reduce-and-i1.ll
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 8c6f5c13a101a5e..cf13f5423de88a2 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1546,18 +1546,23 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
if (ISD == ISD::AND) {
// Example sequences:
- // vsetvli a0, zero, e8, mf8, ta, ma
// vmand.mm v8, v9, v8 ; needed every time type is split
// vmnot.m v8, v0
// vcpop.m a0, v8
// seqz a0, a0
- // Fixed VT: In v512i1 and larger vector elements,
- // Scalable VT: In v128i1 and larger vector elements,
- // the VMAND_MM instructions have started to be added.
- return ((LT.first >= 2)
- ? LT.first - (LT.second.isScalableVector() ? 1 : 2)
- : 0) *
+ // Scalable VT: In nxv128i1 and larger vector elements,
+ // Fixed VT: If getFixedSizeInBits() >= (4 * getRealMinVLen()),
+ // the VMAND_MM instructions have started to be added.
+ InstructionCost NumOfVMAND = 0;
+ if (LT.second.isScalableVector()) {
+ NumOfVMAND = (LT.first >= 2) ? (LT.first - 1) : 0;
+ } else {
+ bool IsOverflow =
+ LT.second.getFixedSizeInBits() == ST->getRealMinVLen();
+ NumOfVMAND = (IsOverflow && LT.first > 2) ? (LT.first - 2) : 0;
+ }
+ return NumOfVMAND *
getRISCVInstructionCost(RISCV::VMAND_MM, LT.second, CostKind) +
getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second, CostKind) +
getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) +
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and-i1.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and-i1.ll
new file mode 100644
index 000000000000000..99a23743674bf94
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and-i1.ll
@@ -0,0 +1,239 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v,+zvl128b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
+; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL128B
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zvl128b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
+; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL128B
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v,+zvl256b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
+; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL256B
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zvl256b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
+; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL256B
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v,+zvl512b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
+; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL512B
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zvl512b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
+; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL512B
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v,+zvl1024b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
+; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL1024B
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zvl1024b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
+; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL1024B
+
+define zeroext i1 @vreduce_and_v1i1(<1 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_v1i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v2i1(<2 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_v2i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v4i1(<4 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_v4i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v8i1(<8 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_v8i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v16i1(<16 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_v16i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v32i1(<32 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_v32i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v64i1(<64 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_v64i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v128i1(<128 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_v128i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v256i1(<256 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_v256i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v512i1(<512 x i1> %v) {
+; THROUGHPUT-VL128B-LABEL: 'vreduce_and_v512i1'
+; THROUGHPUT-VL128B-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> %v)
+; THROUGHPUT-VL128B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+; THROUGHPUT-VL256B-LABEL: 'vreduce_and_v512i1'
+; THROUGHPUT-VL256B-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> %v)
+; THROUGHPUT-VL256B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+; THROUGHPUT-VL512B-LABEL: 'vreduce_and_v512i1'
+; THROUGHPUT-VL512B-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> %v)
+; THROUGHPUT-VL512B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+; THROUGHPUT-VL1024B-LABEL: 'vreduce_and_v512i1'
+; THROUGHPUT-VL1024B-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> %v)
+; THROUGHPUT-VL1024B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v1024i1(<1024 x i1> %v) {
+; THROUGHPUT-VL128B-LABEL: 'vreduce_and_v1024i1'
+; THROUGHPUT-VL128B-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %red = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> %v)
+; THROUGHPUT-VL128B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+; THROUGHPUT-VL256B-LABEL: 'vreduce_and_v1024i1'
+; THROUGHPUT-VL256B-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> %v)
+; THROUGHPUT-VL256B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+; THROUGHPUT-VL512B-LABEL: 'vreduce_and_v1024i1'
+; THROUGHPUT-VL512B-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> %v)
+; THROUGHPUT-VL512B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+; THROUGHPUT-VL1024B-LABEL: 'vreduce_and_v1024i1'
+; THROUGHPUT-VL1024B-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> %v)
+; THROUGHPUT-VL1024B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv1i1(<vscale x 1 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_nxv1i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv2i1(<vscale x 2 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_nxv2i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv4i1(<vscale x 4 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_nxv4i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv8i1(<vscale x 8 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_nxv8i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv8i1(<vscale x 8 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.nxv8i1(<vscale x 8 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv16i1(<vscale x 16 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_nxv16i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv32i1(<vscale x 32 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_nxv32i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv64i1(<vscale x 64 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_nxv64i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv128i1(<vscale x 128 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_nxv128i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv256i1(<vscale x 256 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_nxv256i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv512i1(<vscale x 512 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_nxv512i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv1024i1(<vscale x 1024 x i1> %v) {
+; THROUGHPUT-LABEL: 'vreduce_and_nxv1024i1'
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
+;
+ %red = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> %v)
+ ret i1 %red
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/reduce-and-i1.ll b/llvm/test/CodeGen/RISCV/rvv/reduce-and-i1.ll
new file mode 100644
index 000000000000000..6f6551bad8968f1
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/reduce-and-i1.ll
@@ -0,0 +1,548 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VL128B
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VL128B
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VL256B
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VL256B
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zvl512b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VL512B
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zvl512b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VL512B
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zvl1024b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VL1024B
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zvl1024b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VL1024B
+
+define zeroext i1 @vreduce_and_v1i1(<1 x i1> %v) {
+; CHECK-LABEL: vreduce_and_v1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vfirst.m a0, v0
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v2i1(<2 x i1> %v) {
+; CHECK-LABEL: vreduce_and_v2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vmnot.m v8, v0
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v4i1(<4 x i1> %v) {
+; CHECK-VL128B-LABEL: vreduce_and_v4i1:
+; CHECK-VL128B: # %bb.0:
+; CHECK-VL128B-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-VL128B-NEXT: vmnot.m v8, v0
+; CHECK-VL128B-NEXT: vcpop.m a0, v8
+; CHECK-VL128B-NEXT: seqz a0, a0
+; CHECK-VL128B-NEXT: ret
+;
+; CHECK-VL256B-LABEL: vreduce_and_v4i1:
+; CHECK-VL256B: # %bb.0:
+; CHECK-VL256B-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
+; CHECK-VL256B-NEXT: vmnot.m v8, v0
+; CHECK-VL256B-NEXT: vcpop.m a0, v8
+; CHECK-VL256B-NEXT: seqz a0, a0
+; CHECK-VL256B-NEXT: ret
+;
+; CHECK-VL512B-LABEL: vreduce_and_v4i1:
+; CHECK-VL512B: # %bb.0:
+; CHECK-VL512B-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
+; CHECK-VL512B-NEXT: vmnot.m v8, v0
+; CHECK-VL512B-NEXT: vcpop.m a0, v8
+; CHECK-VL512B-NEXT: seqz a0, a0
+; CHECK-VL512B-NEXT: ret
+;
+; CHECK-VL1024B-LABEL: vreduce_and_v4i1:
+; CHECK-VL1024B: # %bb.0:
+; CHECK-VL1024B-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
+; CHECK-VL1024B-NEXT: vmnot.m v8, v0
+; CHECK-VL1024B-NEXT: vcpop.m a0, v8
+; CHECK-VL1024B-NEXT: seqz a0, a0
+; CHECK-VL1024B-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v8i1(<8 x i1> %v) {
+; CHECK-VL128B-LABEL: vreduce_and_v8i1:
+; CHECK-VL128B: # %bb.0:
+; CHECK-VL128B-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-VL128B-NEXT: vmnot.m v8, v0
+; CHECK-VL128B-NEXT: vcpop.m a0, v8
+; CHECK-VL128B-NEXT: seqz a0, a0
+; CHECK-VL128B-NEXT: ret
+;
+; CHECK-VL256B-LABEL: vreduce_and_v8i1:
+; CHECK-VL256B: # %bb.0:
+; CHECK-VL256B-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
+; CHECK-VL256B-NEXT: vmnot.m v8, v0
+; CHECK-VL256B-NEXT: vcpop.m a0, v8
+; CHECK-VL256B-NEXT: seqz a0, a0
+; CHECK-VL256B-NEXT: ret
+;
+; CHECK-VL512B-LABEL: vreduce_and_v8i1:
+; CHECK-VL512B: # %bb.0:
+; CHECK-VL512B-NEXT: vsetivli zero, 8, e8, mf8, ta, ma
+; CHECK-VL512B-NEXT: vmnot.m v8, v0
+; CHECK-VL512B-NEXT: vcpop.m a0, v8
+; CHECK-VL512B-NEXT: seqz a0, a0
+; CHECK-VL512B-NEXT: ret
+;
+; CHECK-VL1024B-LABEL: vreduce_and_v8i1:
+; CHECK-VL1024B: # %bb.0:
+; CHECK-VL1024B-NEXT: vsetivli zero, 8, e8, mf8, ta, ma
+; CHECK-VL1024B-NEXT: vmnot.m v8, v0
+; CHECK-VL1024B-NEXT: vcpop.m a0, v8
+; CHECK-VL1024B-NEXT: seqz a0, a0
+; CHECK-VL1024B-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v16i1(<16 x i1> %v) {
+; CHECK-VL128B-LABEL: vreduce_and_v16i1:
+; CHECK-VL128B: # %bb.0:
+; CHECK-VL128B-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-VL128B-NEXT: vmnot.m v8, v0
+; CHECK-VL128B-NEXT: vcpop.m a0, v8
+; CHECK-VL128B-NEXT: seqz a0, a0
+; CHECK-VL128B-NEXT: ret
+;
+; CHECK-VL256B-LABEL: vreduce_and_v16i1:
+; CHECK-VL256B: # %bb.0:
+; CHECK-VL256B-NEXT: vsetivli zero, 16, e8, mf2, ta, ma
+; CHECK-VL256B-NEXT: vmnot.m v8, v0
+; CHECK-VL256B-NEXT: vcpop.m a0, v8
+; CHECK-VL256B-NEXT: seqz a0, a0
+; CHECK-VL256B-NEXT: ret
+;
+; CHECK-VL512B-LABEL: vreduce_and_v16i1:
+; CHECK-VL512B: # %bb.0:
+; CHECK-VL512B-NEXT: vsetivli zero, 16, e8, mf4, ta, ma
+; CHECK-VL512B-NEXT: vmnot.m v8, v0
+; CHECK-VL512B-NEXT: vcpop.m a0, v8
+; CHECK-VL512B-NEXT: seqz a0, a0
+; CHECK-VL512B-NEXT: ret
+;
+; CHECK-VL1024B-LABEL: vreduce_and_v16i1:
+; CHECK-VL1024B: # %bb.0:
+; CHECK-VL1024B-NEXT: vsetivli zero, 16, e8, mf8, ta, ma
+; CHECK-VL1024B-NEXT: vmnot.m v8, v0
+; CHECK-VL1024B-NEXT: vcpop.m a0, v8
+; CHECK-VL1024B-NEXT: seqz a0, a0
+; CHECK-VL1024B-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v32i1(<32 x i1> %v) {
+; CHECK-VL128B-LABEL: vreduce_and_v32i1:
+; CHECK-VL128B: # %bb.0:
+; CHECK-VL128B-NEXT: li a0, 32
+; CHECK-VL128B-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-VL128B-NEXT: vmnot.m v8, v0
+; CHECK-VL128B-NEXT: vcpop.m a0, v8
+; CHECK-VL128B-NEXT: seqz a0, a0
+; CHECK-VL128B-NEXT: ret
+;
+; CHECK-VL256B-LABEL: vreduce_and_v32i1:
+; CHECK-VL256B: # %bb.0:
+; CHECK-VL256B-NEXT: li a0, 32
+; CHECK-VL256B-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-VL256B-NEXT: vmnot.m v8, v0
+; CHECK-VL256B-NEXT: vcpop.m a0, v8
+; CHECK-VL256B-NEXT: seqz a0, a0
+; CHECK-VL256B-NEXT: ret
+;
+; CHECK-VL512B-LABEL: vreduce_and_v32i1:
+; CHECK-VL512B: # %bb.0:
+; CHECK-VL512B-NEXT: li a0, 32
+; CHECK-VL512B-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-VL512B-NEXT: vmnot.m v8, v0
+; CHECK-VL512B-NEXT: vcpop.m a0, v8
+; CHECK-VL512B-NEXT: seqz a0, a0
+; CHECK-VL512B-NEXT: ret
+;
+; CHECK-VL1024B-LABEL: vreduce_and_v32i1:
+; CHECK-VL1024B: # %bb.0:
+; CHECK-VL1024B-NEXT: li a0, 32
+; CHECK-VL1024B-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-VL1024B-NEXT: vmnot.m v8, v0
+; CHECK-VL1024B-NEXT: vcpop.m a0, v8
+; CHECK-VL1024B-NEXT: seqz a0, a0
+; CHECK-VL1024B-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v64i1(<64 x i1> %v) {
+; CHECK-VL128B-LABEL: vreduce_and_v64i1:
+; CHECK-VL128B: # %bb.0:
+; CHECK-VL128B-NEXT: li a0, 64
+; CHECK-VL128B-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-VL128B-NEXT: vmnot.m v8, v0
+; CHECK-VL128B-NEXT: vcpop.m a0, v8
+; CHECK-VL128B-NEXT: seqz a0, a0
+; CHECK-VL128B-NEXT: ret
+;
+; CHECK-VL256B-LABEL: vreduce_and_v64i1:
+; CHECK-VL256B: # %bb.0:
+; CHECK-VL256B-NEXT: li a0, 64
+; CHECK-VL256B-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-VL256B-NEXT: vmnot.m v8, v0
+; CHECK-VL256B-NEXT: vcpop.m a0, v8
+; CHECK-VL256B-NEXT: seqz a0, a0
+; CHECK-VL256B-NEXT: ret
+;
+; CHECK-VL512B-LABEL: vreduce_and_v64i1:
+; CHECK-VL512B: # %bb.0:
+; CHECK-VL512B-NEXT: li a0, 64
+; CHECK-VL512B-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-VL512B-NEXT: vmnot.m v8, v0
+; CHECK-VL512B-NEXT: vcpop.m a0, v8
+; CHECK-VL512B-NEXT: seqz a0, a0
+; CHECK-VL512B-NEXT: ret
+;
+; CHECK-VL1024B-LABEL: vreduce_and_v64i1:
+; CHECK-VL1024B: # %bb.0:
+; CHECK-VL1024B-NEXT: li a0, 64
+; CHECK-VL1024B-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-VL1024B-NEXT: vmnot.m v8, v0
+; CHECK-VL1024B-NEXT: vcpop.m a0, v8
+; CHECK-VL1024B-NEXT: seqz a0, a0
+; CHECK-VL1024B-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v128i1(<128 x i1> %v) {
+; CHECK-VL128B-LABEL: vreduce_and_v128i1:
+; CHECK-VL128B: # %bb.0:
+; CHECK-VL128B-NEXT: li a0, 128
+; CHECK-VL128B-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-VL128B-NEXT: vmnot.m v8, v0
+; CHECK-VL128B-NEXT: vcpop.m a0, v8
+; CHECK-VL128B-NEXT: seqz a0, a0
+; CHECK-VL128B-NEXT: ret
+;
+; CHECK-VL256B-LABEL: vreduce_and_v128i1:
+; CHECK-VL256B: # %bb.0:
+; CHECK-VL256B-NEXT: li a0, 128
+; CHECK-VL256B-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-VL256B-NEXT: vmnot.m v8, v0
+; CHECK-VL256B-NEXT: vcpop.m a0, v8
+; CHECK-VL256B-NEXT: seqz a0, a0
+; CHECK-VL256B-NEXT: ret
+;
+; CHECK-VL512B-LABEL: vreduce_and_v128i1:
+; CHECK-VL512B: # %bb.0:
+; CHECK-VL512B-NEXT: li a0, 128
+; CHECK-VL512B-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-VL512B-NEXT: vmnot.m v8, v0
+; CHECK-VL512B-NEXT: vcpop.m a0, v8
+; CHECK-VL512B-NEXT: seqz a0, a0
+; CHECK-VL512B-NEXT: ret
+;
+; CHECK-VL1024B-LABEL: vreduce_and_v128i1:
+; CHECK-VL1024B: # %bb.0:
+; CHECK-VL1024B-NEXT: li a0, 128
+; CHECK-VL1024B-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-VL1024B-NEXT: vmnot.m v8, v0
+; CHECK-VL1024B-NEXT: vcpop.m a0, v8
+; CHECK-VL1024B-NEXT: seqz a0, a0
+; CHECK-VL1024B-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v256i1(<256 x i1> %v) {
+; CHECK-VL128B-LABEL: vreduce_and_v256i1:
+; CHECK-VL128B: # %bb.0:
+; CHECK-VL128B-NEXT: li a0, 128
+; CHECK-VL128B-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-VL128B-NEXT: vmnand.mm v8, v0, v8
+; CHECK-VL128B-NEXT: vcpop.m a0, v8
+; CHECK-VL128B-NEXT: seqz a0, a0
+; CHECK-VL128B-NEXT: ret
+;
+; CHECK-VL256B-LABEL: vreduce_and_v256i1:
+; CHECK-VL256B: # %bb.0:
+; CHECK-VL256B-NEXT: li a0, 256
+; CHECK-VL256B-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-VL256B-NEXT: vmnot.m v8, v0
+; CHECK-VL256B-NEXT: vcpop.m a0, v8
+; CHECK-VL256B-NEXT: seqz a0, a0
+; CHECK-VL256B-NEXT: ret
+;
+; CHECK-VL512B-LABEL: vreduce_and_v256i1:
+; CHECK-VL512B: # %bb.0:
+; CHECK-VL512B-NEXT: li a0, 256
+; CHECK-VL512B-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-VL512B-NEXT: vmnot.m v8, v0
+; CHECK-VL512B-NEXT: vcpop.m a0, v8
+; CHECK-VL512B-NEXT: seqz a0, a0
+; CHECK-VL512B-NEXT: ret
+;
+; CHECK-VL1024B-LABEL: vreduce_and_v256i1:
+; CHECK-VL1024B: # %bb.0:
+; CHECK-VL1024B-NEXT: li a0, 256
+; CHECK-VL1024B-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-VL1024B-NEXT: vmnot.m v8, v0
+; CHECK-VL1024B-NEXT: vcpop.m a0, v8
+; CHECK-VL1024B-NEXT: seqz a0, a0
+; CHECK-VL1024B-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v512i1(<512 x i1> %v) {
+; CHECK-VL128B-LABEL: vreduce_and_v512i1:
+; CHECK-VL128B: # %bb.0:
+; CHECK-VL128B-NEXT: li a0, 128
+; CHECK-VL128B-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-VL128B-NEXT: vmand.mm v8, v8, v10
+; CHECK-VL128B-NEXT: vmand.mm v9, v0, v9
+; CHECK-VL128B-NEXT: vmnand.mm v8, v9, v8
+; CHECK-VL128B-NEXT: vcpop.m a0, v8
+; CHECK-VL128B-NEXT: seqz a0, a0
+; CHECK-VL128B-NEXT: ret
+;
+; CHECK-VL256B-LABEL: vreduce_and_v512i1:
+; CHECK-VL256B: # %bb.0:
+; CHECK-VL256B-NEXT: li a0, 256
+; CHECK-VL256B-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-VL256B-NEXT: vmnand.mm v8, v0, v8
+; CHECK-VL256B-NEXT: vcpop.m a0, v8
+; CHECK-VL256B-NEXT: seqz a0, a0
+; CHECK-VL256B-NEXT: ret
+;
+; CHECK-VL512B-LABEL: vreduce_and_v512i1:
+; CHECK-VL512B: # %bb.0:
+; CHECK-VL512B-NEXT: li a0, 512
+; CHECK-VL512B-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-VL512B-NEXT: vmnot.m v8, v0
+; CHECK-VL512B-NEXT: vcpop.m a0, v8
+; CHECK-VL512B-NEXT: seqz a0, a0
+; CHECK-VL512B-NEXT: ret
+;
+; CHECK-VL1024B-LABEL: vreduce_and_v512i1:
+; CHECK-VL1024B: # %bb.0:
+; CHECK-VL1024B-NEXT: li a0, 512
+; CHECK-VL1024B-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-VL1024B-NEXT: vmnot.m v8, v0
+; CHECK-VL1024B-NEXT: vcpop.m a0, v8
+; CHECK-VL1024B-NEXT: seqz a0, a0
+; CHECK-VL1024B-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_v1024i1(<1024 x i1> %v) {
+; CHECK-VL128B-LABEL: vreduce_and_v1024i1:
+; CHECK-VL128B: # %bb.0:
+; CHECK-VL128B-NEXT: li a0, 128
+; CHECK-VL128B-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-VL128B-NEXT: vmand.mm v10, v10, v14
+; CHECK-VL128B-NEXT: vmand.mm v8, v8, v12
+; CHECK-VL128B-NEXT: vmand.mm v9, v9, v13
+; CHECK-VL128B-NEXT: vmand.mm v11, v0, v11
+; CHECK-VL128B-NEXT: vmand.mm v8, v8, v10
+; CHECK-VL128B-NEXT: vmand.mm v9, v11, v9
+; CHECK-VL128B-NEXT: vmnand.mm v8, v9, v8
+; CHECK-VL128B-NEXT: vcpop.m a0, v8
+; CHECK-VL128B-NEXT: seqz a0, a0
+; CHECK-VL128B-NEXT: ret
+;
+; CHECK-VL256B-LABEL: vreduce_and_v1024i1:
+; CHECK-VL256B: # %bb.0:
+; CHECK-VL256B-NEXT: li a0, 256
+; CHECK-VL256B-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-VL256B-NEXT: vmand.mm v8, v8, v10
+; CHECK-VL256B-NEXT: vmand.mm v9, v0, v9
+; CHECK-VL256B-NEXT: vmnand.mm v8, v9, v8
+; CHECK-VL256B-NEXT: vcpop.m a0, v8
+; CHECK-VL256B-NEXT: seqz a0, a0
+; CHECK-VL256B-NEXT: ret
+;
+; CHECK-VL512B-LABEL: vreduce_and_v1024i1:
+; CHECK-VL512B: # %bb.0:
+; CHECK-VL512B-NEXT: li a0, 512
+; CHECK-VL512B-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-VL512B-NEXT: vmnand.mm v8, v0, v8
+; CHECK-VL512B-NEXT: vcpop.m a0, v8
+; CHECK-VL512B-NEXT: seqz a0, a0
+; CHECK-VL512B-NEXT: ret
+;
+; CHECK-VL1024B-LABEL: vreduce_and_v1024i1:
+; CHECK-VL1024B: # %bb.0:
+; CHECK-VL1024B-NEXT: li a0, 1024
+; CHECK-VL1024B-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-VL1024B-NEXT: vmnot.m v8, v0
+; CHECK-VL1024B-NEXT: vcpop.m a0, v8
+; CHECK-VL1024B-NEXT: seqz a0, a0
+; CHECK-VL1024B-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv1i1(<vscale x 1 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmnot.m v8, v0
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv2i1(<vscale x 2 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmnot.m v8, v0
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv4i1(<vscale x 4 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vmnot.m v8, v0
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv8i1(<vscale x 8 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmnot.m v8, v0
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.nxv8i1(<vscale x 8 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv16i1(<vscale x 16 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv16i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-NEXT: vmnot.m v8, v0
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv32i1(<vscale x 32 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv32i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmnot.m v8, v0
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv64i1(<vscale x 64 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv64i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT: vmnot.m v8, v0
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv128i1(<vscale x 128 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv128i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT: vmand.mm v8, v0, v8
+; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv256i1(<vscale x 256 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv256i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT: vmand.mm v8, v8, v10
+; CHECK-NEXT: vmand.mm v9, v0, v9
+; CHECK-NEXT: vmand.mm v8, v9, v8
+; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv512i1(<vscale x 512 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv512i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT: vmand.mm v10, v10, v14
+; CHECK-NEXT: vmand.mm v8, v8, v12
+; CHECK-NEXT: vmand.mm v9, v9, v13
+; CHECK-NEXT: vmand.mm v11, v0, v11
+; CHECK-NEXT: vmand.mm v8, v8, v10
+; CHECK-NEXT: vmand.mm v9, v11, v9
+; CHECK-NEXT: vmand.mm v8, v9, v8
+; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> %v)
+ ret i1 %red
+}
+
+define zeroext i1 @vreduce_and_nxv1024i1(<vscale x 1024 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv1024i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT: vmand.mm v14, v14, v22
+; CHECK-NEXT: vmand.mm v10, v10, v18
+; CHECK-NEXT: vmand.mm v12, v12, v20
+; CHECK-NEXT: vmand.mm v8, v8, v16
+; CHECK-NEXT: vmand.mm v13, v13, v21
+; CHECK-NEXT: vmand.mm v9, v9, v17
+; CHECK-NEXT: vmand.mm v11, v11, v19
+; CHECK-NEXT: vmand.mm v15, v0, v15
+; CHECK-NEXT: vmand.mm v10, v10, v14
+; CHECK-NEXT: vmand.mm v8, v8, v12
+; CHECK-NEXT: vmand.mm v9, v9, v13
+; CHECK-NEXT: vmand.mm v11, v15, v11
+; CHECK-NEXT: vmand.mm v8, v8, v10
+; CHECK-NEXT: vmand.mm v9, v11, v9
+; CHECK-NEXT: vmand.mm v8, v9, v8
+; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> %v)
+ ret i1 %red
+}
>From 493f56c65f167dfb790d923f8064925d7f44d59b Mon Sep 17 00:00:00 2001
From: Shao-Ce SUN <sunshaoce at outlook.com>
Date: Tue, 7 Jan 2025 14:59:12 +0800
Subject: [PATCH 3/3] fixup! Combine formulas of NumOfVMAND
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 14 +++++---------
.../Analysis/CostModel/RISCV/reduce-and-i1.ll | 8 ++++----
llvm/test/Analysis/CostModel/RISCV/reduce-and.ll | 16 ++++++++--------
llvm/test/CodeGen/RISCV/rvv/reduce-and-i1.ll | 12 ++++--------
4 files changed, 21 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index cf13f5423de88a2..cd2674841f835da 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1551,17 +1551,13 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
// vcpop.m a0, v8
// seqz a0, a0
- // Scalable VT: In nxv128i1 and larger vector elements,
+ // Scalable VT: In nxv256i1 and larger vector elements,
// Fixed VT: If getFixedSizeInBits() >= (4 * getRealMinVLen()),
- // the VMAND_MM instructions have started to be added.
+ // the VMAND_MM instructions have started to be added.
InstructionCost NumOfVMAND = 0;
- if (LT.second.isScalableVector()) {
- NumOfVMAND = (LT.first >= 2) ? (LT.first - 1) : 0;
- } else {
- bool IsOverflow =
- LT.second.getFixedSizeInBits() == ST->getRealMinVLen();
- NumOfVMAND = (IsOverflow && LT.first > 2) ? (LT.first - 2) : 0;
- }
+ if (LT.second.isScalableVector() ||
+ LT.second.getFixedSizeInBits() == ST->getRealMinVLen())
+ NumOfVMAND = (LT.first > 2) ? (LT.first - 2) : 0;
return NumOfVMAND *
getRISCVInstructionCost(RISCV::VMAND_MM, LT.second, CostKind) +
getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second, CostKind) +
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and-i1.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and-i1.ll
index 99a23743674bf94..cc88d907187d141 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and-i1.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and-i1.ll
@@ -204,7 +204,7 @@ define zeroext i1 @vreduce_and_nxv64i1(<vscale x 64 x i1> %v) {
define zeroext i1 @vreduce_and_nxv128i1(<vscale x 128 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_nxv128i1'
-; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> %v)
@@ -213,7 +213,7 @@ define zeroext i1 @vreduce_and_nxv128i1(<vscale x 128 x i1> %v) {
define zeroext i1 @vreduce_and_nxv256i1(<vscale x 256 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_nxv256i1'
-; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> %v)
@@ -222,7 +222,7 @@ define zeroext i1 @vreduce_and_nxv256i1(<vscale x 256 x i1> %v) {
define zeroext i1 @vreduce_and_nxv512i1(<vscale x 512 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_nxv512i1'
-; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> %v)
@@ -231,7 +231,7 @@ define zeroext i1 @vreduce_and_nxv512i1(<vscale x 512 x i1> %v) {
define zeroext i1 @vreduce_and_nxv1024i1(<vscale x 1024 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_nxv1024i1'
-; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> %v)
+; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> %v)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
index 7b626c426fc226d..dc6a582df133bc7 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@@ -24,10 +24,10 @@ define i32 @reduce_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV64 = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV128 = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV256 = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV512 = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV1024 = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV128 = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV256 = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV512 = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %NXV1024 = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i1'
@@ -49,10 +49,10 @@ define i32 @reduce_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV64 = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV128 = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV256 = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV512 = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV1024 = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV128 = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV256 = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV512 = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %NXV1024 = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
diff --git a/llvm/test/CodeGen/RISCV/rvv/reduce-and-i1.ll b/llvm/test/CodeGen/RISCV/rvv/reduce-and-i1.ll
index 6f6551bad8968f1..61a6a601d28bb7d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/reduce-and-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/reduce-and-i1.ll
@@ -477,8 +477,7 @@ define zeroext i1 @vreduce_and_nxv128i1(<vscale x 128 x i1> %v) {
; CHECK-LABEL: vreduce_and_nxv128i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; CHECK-NEXT: vmand.mm v8, v0, v8
-; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vmnand.mm v8, v0, v8
; CHECK-NEXT: vcpop.m a0, v8
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
@@ -492,8 +491,7 @@ define zeroext i1 @vreduce_and_nxv256i1(<vscale x 256 x i1> %v) {
; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
; CHECK-NEXT: vmand.mm v8, v8, v10
; CHECK-NEXT: vmand.mm v9, v0, v9
-; CHECK-NEXT: vmand.mm v8, v9, v8
-; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vmnand.mm v8, v9, v8
; CHECK-NEXT: vcpop.m a0, v8
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
@@ -511,8 +509,7 @@ define zeroext i1 @vreduce_and_nxv512i1(<vscale x 512 x i1> %v) {
; CHECK-NEXT: vmand.mm v11, v0, v11
; CHECK-NEXT: vmand.mm v8, v8, v10
; CHECK-NEXT: vmand.mm v9, v11, v9
-; CHECK-NEXT: vmand.mm v8, v9, v8
-; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vmnand.mm v8, v9, v8
; CHECK-NEXT: vcpop.m a0, v8
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
@@ -538,8 +535,7 @@ define zeroext i1 @vreduce_and_nxv1024i1(<vscale x 1024 x i1> %v) {
; CHECK-NEXT: vmand.mm v11, v15, v11
; CHECK-NEXT: vmand.mm v8, v8, v10
; CHECK-NEXT: vmand.mm v9, v11, v9
-; CHECK-NEXT: vmand.mm v8, v9, v8
-; CHECK-NEXT: vmnot.m v8, v8
+; CHECK-NEXT: vmnand.mm v8, v9, v8
; CHECK-NEXT: vcpop.m a0, v8
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ret
More information about the llvm-commits
mailing list