[llvm] [RISCV] Fix the cost of `llvm.vector.reduce.and` (PR #119160)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 8 19:07:14 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: Shao-Ce SUN (sunshaoce)
<details>
<summary>Changes</summary>
I added some CodeGen test cases related to reduce. To maintain consistency, I also added cases for instructions like `vector.reduce.or`.
For cases where `v1i1` type generates `VFIRST`, please refer to: https://reviews.llvm.org/D139512.
---
Patch is 48.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119160.diff
6 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+16-2)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-and.ll (+42-8)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-max.ll (+2-2)
- (modified) llvm/test/Analysis/CostModel/RISCV/reduce-min.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll (+472)
- (modified) llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll (+558)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index cbf8c57fde44d1..4b5f5b0c8e047d 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1538,13 +1538,27 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
if (ElementTy->isIntegerTy(1)) {
if (ISD == ISD::AND) {
// Example sequences:
+ // vfirst.m a0, v0
+ // seqz a0, a0
+ if (LT.second == MVT::v1i1)
+ return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second, CostKind) +
+ getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
+ CmpInst::ICMP_EQ, CostKind);
+ // Example sequences:
// vsetvli a0, zero, e8, mf8, ta, ma
// vmand.mm v8, v9, v8 ; needed every time type is split
// vmnot.m v8, v0
// vcpop.m a0, v8
// seqz a0, a0
- return LT.first * getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
- CostKind) +
+
+ // Fixed VT: In v512i1 and larger vector elements,
+ // Scalable VT: In v128i1 and larger vector elements,
+ // the VMAND_MM instructions have started to be added.
+ return ((LT.first >= 2)
+ ? LT.first - (LT.second.isScalableVector() ? 1 : 2)
+ : 0) *
+ getRISCVInstructionCost(RISCV::VMAND_MM, LT.second, CostKind) +
+ getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second, CostKind) +
getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) +
getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
CmpInst::ICMP_EQ, CostKind);
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
index e4f1cf8ff418a8..7b626c426fc226 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@@ -6,7 +6,7 @@
define i32 @reduce_i1(i32 %arg) {
; CHECK-LABEL: 'reduce_i1'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
@@ -14,13 +14,24 @@ define i32 @reduce_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call i1 @llvm.vector.reduce.and.nxv8i1(<vscale x 8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV64 = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV128 = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV256 = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV512 = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV1024 = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i1'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
@@ -28,9 +39,20 @@ define i32 @reduce_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call i1 @llvm.vector.reduce.and.nxv8i1(<vscale x 8 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV64 = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV128 = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV256 = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV512 = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV1024 = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
@@ -44,6 +66,18 @@ define i32 @reduce_i1(i32 %arg) {
%V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
%V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
%V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+
+ %NXV1 = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> undef)
+ %NXV2 = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> undef)
+ %NXV4 = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> undef)
+ %NXV8 = call i1 @llvm.vector.reduce.and.nxv8i1(<vscale x 8 x i1> undef)
+ %NXV16 = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> undef)
+ %NXV32 = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> undef)
+ %NXV64 = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> undef)
+ %NXV128 = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> undef)
+ %NXV256 = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> undef)
+ %NXV512 = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> undef)
+ %NXV1024 = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> undef)
ret i32 undef
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
index f11e9f2b5ae837..5c9303af31747e 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
@@ -176,7 +176,7 @@ define i32 @reduce_umax_i64(i32 %arg) {
define i32 @reduce_smin_i1(i32 %arg) {
; CHECK-LABEL: 'reduce_smin_i1'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.smax.v2i1(<2 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.smax.v4i1(<4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.smax.v8i1(<8 x i1> undef)
@@ -187,7 +187,7 @@ define i32 @reduce_smin_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_smin_i1'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.smax.v2i1(<2 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.smax.v4i1(<4 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.smax.v8i1(<8 x i1> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
index 457fdbe46f73b5..9875d3e5858115 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
@@ -6,7 +6,7 @@
define i32 @reduce_umin_i1(i32 %arg) {
; CHECK-LABEL: 'reduce_umin_i1'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.umin.v2i1(<2 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.umin.v4i1(<4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.umin.v8i1(<8 x i1> undef)
@@ -17,7 +17,7 @@ define i32 @reduce_umin_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_umin_i1'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.umin.v2i1(<2 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.umin.v4i1(<4 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.umin.v8i1(<8 x i1> undef)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll
index 44d4a8a1e04cda..0d31ec5f784352 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll
@@ -763,3 +763,475 @@ define zeroext i1 @vreduce_add_v64i1(<64 x i1> %v) {
%red = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> %v)
ret i1 %red
}
+
+declare i1 @llvm.vector.reduce.or.v128i1(<128 x i1>)
+
+define zeroext i1 @vreduce_or_v128i1(<128 x i1> %v) {
+; CHECK-LABEL: vreduce_or_v128i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 128
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.v128i1(<128 x i1>)
+
+define zeroext i1 @vreduce_xor_v128i1(<128 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_v128i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 128
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.v128i1(<128 x i1>)
+
+define zeroext i1 @vreduce_and_v128i1(<128 x i1> %v) {
+; CHECK-LABEL: vreduce_and_v128i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 128
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vmnot.m v8, v0
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.umax.v128i1(<128 x i1>)
+
+define zeroext i1 @vreduce_umax_v128i1(<128 x i1> %v) {
+; CHECK-LABEL: vreduce_umax_v128i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 128
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.smax.v128i1(<128 x i1>)
+
+define zeroext i1 @vreduce_smax_v128i1(<128 x i1> %v) {
+; CHECK-LABEL: vreduce_smax_v128i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 128
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vmnot.m v8, v0
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.umin.v128i1(<128 x i1>)
+
+define zeroext i1 @vreduce_umin_v128i1(<128 x i1> %v) {
+; CHECK-LABEL: vreduce_umin_v128i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 128
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vmnot.m v8, v0
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.smin.v128i1(<128 x i1>)
+
+define zeroext i1 @vreduce_smin_v128i1(<128 x i1> %v) {
+; CHECK-LABEL: vreduce_smin_v128i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 128
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.or.v256i1(<256 x i1>)
+
+define zeroext i1 @vreduce_or_v256i1(<256 x i1> %v) {
+; CHECK-LABEL: vreduce_or_v256i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 128
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vmor.mm v8, v0, v8
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.v256i1(<256 x i1>)
+
+define zeroext i1 @vreduce_xor_v256i1(<256 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_v256i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 128
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vmxor.mm v8, v0, v8
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.xor.v256i1(<256 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.v256i1(<256 x i1>)
+
+define zeroext i1 @vreduce_and_v256i1(<256 x i1> %v) {
+; CHECK-LABEL: vreduce_and_v256i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 128
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vmnand.mm v8, v0, v8
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.umax.v256i1(<256 x i1>)
+
+define zeroext i1 @vreduce_umax_v256i1(<256 x i1> %v) {
+; CHECK-LABEL: vreduce_umax_v256i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 128
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vmor.mm v8, v0, v8
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.umax.v256i1(<256 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.smax.v256i1(<256 x i1>)
+
+define zeroext i1 @vreduce_smax_v256i1(<256 x i1> %v) {
+; CHECK-LABEL: vreduce_smax_v256i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 128
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vmnand.mm v8, v0, v8
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.smax.v256i1(<256 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.r...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/119160
More information about the llvm-commits
mailing list