[llvm] [RISCV] Fix the cost of `llvm.vector.reduce.and` (PR #119160)

Shao-Ce SUN via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 8 19:33:42 PST 2024


https://github.com/sunshaoce updated https://github.com/llvm/llvm-project/pull/119160

>From c22d02622a491319ecda8327d216f409d1e426f9 Mon Sep 17 00:00:00 2001
From: Shao-Ce SUN <sunshaoce at outlook.com>
Date: Sat, 7 Dec 2024 22:28:29 +0800
Subject: [PATCH 1/3] [RISCV] Fix the cost of llvm.vector.reduce.and

---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp |  18 +-
 .../Analysis/CostModel/RISCV/reduce-and.ll    |  50 ++-
 .../Analysis/CostModel/RISCV/reduce-max.ll    |   4 +-
 .../Analysis/CostModel/RISCV/reduce-min.ll    |   4 +-
 llvm/test/CodeGen/RISCV/rvv/reduce-add.ll     | 307 ++++++++++++++++++
 5 files changed, 369 insertions(+), 14 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/reduce-add.ll

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index cbf8c57fde44d1..4b5f5b0c8e047d 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1538,13 +1538,27 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
   if (ElementTy->isIntegerTy(1)) {
     if (ISD == ISD::AND) {
       // Example sequences:
+      //   vfirst.m a0, v0
+      //   seqz a0, a0
+      if (LT.second == MVT::v1i1)
+        return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second, CostKind) +
+               getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
+                                  CmpInst::ICMP_EQ, CostKind);
+      // Example sequences:
       //   vsetvli a0, zero, e8, mf8, ta, ma
       //   vmand.mm v8, v9, v8 ; needed every time type is split
       //   vmnot.m v8, v0
       //   vcpop.m a0, v8
       //   seqz a0, a0
-      return LT.first * getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
-                                                CostKind) +
+
+      // Fixed VT:    In v512i1 and larger vector elements,
+      // Scalable VT: In v128i1 and larger vector elements,
+      // the VMAND_MM instructions have started to be added.
+      return ((LT.first >= 2)
+                  ? LT.first - (LT.second.isScalableVector() ? 1 : 2)
+                  : 0) *
+                 getRISCVInstructionCost(RISCV::VMAND_MM, LT.second, CostKind) +
+             getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second, CostKind) +
              getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) +
              getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
                                 CmpInst::ICMP_EQ, CostKind);
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
index e4f1cf8ff418a8..7b626c426fc226 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@@ -6,7 +6,7 @@
 
 define i32 @reduce_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
@@ -14,13 +14,24 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call i1 @llvm.vector.reduce.and.nxv8i1(<vscale x 8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV64 = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV128 = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV256 = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV512 = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV1024 = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
@@ -28,9 +39,20 @@ define i32 @reduce_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call i1 @llvm.vector.reduce.and.nxv8i1(<vscale x 8 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV64 = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV128 = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV256 = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV512 = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV1024 = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
@@ -44,6 +66,18 @@ define i32 @reduce_i1(i32 %arg) {
   %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
   %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
   %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+
+  %NXV1   = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> undef)
+  %NXV2   = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> undef)
+  %NXV4   = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> undef)
+  %NXV8   = call i1 @llvm.vector.reduce.and.nxv8i1(<vscale x 8 x i1> undef)
+  %NXV16  = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> undef)
+  %NXV32  = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> undef)
+  %NXV64  = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> undef)
+  %NXV128 = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> undef)
+  %NXV256 = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> undef)
+  %NXV512 = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> undef)
+  %NXV1024 = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> undef)
   ret i32 undef
 }
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
index f11e9f2b5ae837..5c9303af31747e 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
@@ -176,7 +176,7 @@ define i32 @reduce_umax_i64(i32 %arg) {
 
 define i32 @reduce_smin_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_smin_i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.smax.v2i1(<2 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.smax.v4i1(<4 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.smax.v8i1(<8 x i1> undef)
@@ -187,7 +187,7 @@ define i32 @reduce_smin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i1'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.smax.v2i1(<2 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.smax.v4i1(<4 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.smax.v8i1(<8 x i1> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
index 457fdbe46f73b5..9875d3e5858115 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
@@ -6,7 +6,7 @@
 
 define i32 @reduce_umin_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_umin_i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.umin.v2i1(<2 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.umin.v4i1(<4 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.umin.v8i1(<8 x i1> undef)
@@ -17,7 +17,7 @@ define i32 @reduce_umin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i1'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.umin.v2i1(<2 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.umin.v4i1(<4 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.umin.v8i1(<8 x i1> undef)
diff --git a/llvm/test/CodeGen/RISCV/rvv/reduce-add.ll b/llvm/test/CodeGen/RISCV/rvv/reduce-add.ll
new file mode 100644
index 00000000000000..9fef8a453b155e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/reduce-add.ll
@@ -0,0 +1,307 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
+
+define i1 @reduce_and_i1(<1 x i1> %a) {
+; CHECK-LABEL: reduce_and_i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vfirst.m a0, v0
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i2(<2 x i1> %a) {
+; CHECK-LABEL: reduce_and_i2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i4(<4 x i1> %a) {
+; CHECK-LABEL: reduce_and_i4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i8(<8 x i1> %a) {
+; CHECK-LABEL: reduce_and_i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i16(<16 x i1> %a) {
+; CHECK-LABEL: reduce_and_i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i32(<32 x i1> %a) {
+; CHECK-LABEL: reduce_and_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i64(<64 x i1> %a) {
+; CHECK-LABEL: reduce_and_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i128(<128 x i1> %a) {
+; CHECK-LABEL: reduce_and_i128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i256(<256 x i1> %a) {
+; CHECK-LABEL: reduce_and_i256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmnand.mm v8, v0, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i512(<512 x i1> %a) {
+; CHECK-LABEL: reduce_and_i512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v0, v9
+; CHECK-NEXT:    vmnand.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i1024(<1024 x i1> %a) {
+; CHECK-LABEL: reduce_and_i1024:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v10, v10, v14
+; CHECK-NEXT:    vmand.mm v8, v8, v12
+; CHECK-NEXT:    vmand.mm v9, v9, v13
+; CHECK-NEXT:    vmand.mm v11, v0, v11
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v11, v9
+; CHECK-NEXT:    vmnand.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> %a)
+  ret i1 %1
+}
+
+
+define i1 @reduce_and_i1_vscale(<vscale x 1 x i1> %a) {
+; CHECK-LABEL: reduce_and_i1_vscale:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i2_vscale(<vscale x 2 x i1> %a) {
+; CHECK-LABEL: reduce_and_i2_vscale:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i4_vscale(<vscale x 4 x i1> %a) {
+; CHECK-LABEL: reduce_and_i4_vscale:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i8_vscale(<vscale x 8 x i1> %a) {
+; CHECK-LABEL: reduce_and_i8_vscale:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.nxv8i1(<vscale x 8 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i16_vscale(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: reduce_and_i16_vscale:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i32_vscale(<vscale x 32 x i1> %a) {
+; CHECK-LABEL: reduce_and_i32_vscale:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i64_vscale(<vscale x 64 x i1> %a) {
+; CHECK-LABEL: reduce_and_i64_vscale:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i128_vscale(<vscale x 128 x i1> %a) {
+; CHECK-LABEL: reduce_and_i128_vscale:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v8, v0, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i256_vscale(<vscale x 256 x i1> %a) {
+; CHECK-LABEL: reduce_and_i256_vscale:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v0, v9
+; CHECK-NEXT:    vmand.mm v8, v9, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i512_vscale(<vscale x 512 x i1> %a) {
+; CHECK-LABEL: reduce_and_i512_vscale:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v10, v10, v14
+; CHECK-NEXT:    vmand.mm v8, v8, v12
+; CHECK-NEXT:    vmand.mm v9, v9, v13
+; CHECK-NEXT:    vmand.mm v11, v0, v11
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v11, v9
+; CHECK-NEXT:    vmand.mm v8, v9, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> %a)
+  ret i1 %1
+}
+
+define i1 @reduce_and_i1024_vscale(<vscale x 1024 x i1> %a) {
+; CHECK-LABEL: reduce_and_i1024_vscale:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v14, v14, v22
+; CHECK-NEXT:    vmand.mm v10, v10, v18
+; CHECK-NEXT:    vmand.mm v12, v12, v20
+; CHECK-NEXT:    vmand.mm v8, v8, v16
+; CHECK-NEXT:    vmand.mm v13, v13, v21
+; CHECK-NEXT:    vmand.mm v9, v9, v17
+; CHECK-NEXT:    vmand.mm v11, v11, v19
+; CHECK-NEXT:    vmand.mm v15, v0, v15
+; CHECK-NEXT:    vmand.mm v10, v10, v14
+; CHECK-NEXT:    vmand.mm v8, v8, v12
+; CHECK-NEXT:    vmand.mm v9, v9, v13
+; CHECK-NEXT:    vmand.mm v11, v15, v11
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v11, v9
+; CHECK-NEXT:    vmand.mm v8, v9, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> %a)
+  ret i1 %1
+}

>From e90875bd0b93e1bd58f1e97654105feae5c83fd7 Mon Sep 17 00:00:00 2001
From: Shao-Ce SUN <sunshaoce at outlook.com>
Date: Mon, 9 Dec 2024 10:31:38 +0800
Subject: [PATCH 2/3] fixup! Add more testcases for vreductions

---
 .../rvv/fixed-vectors-vreductions-mask.ll     | 472 +++++++++++++++
 llvm/test/CodeGen/RISCV/rvv/reduce-add.ll     | 307 ----------
 .../CodeGen/RISCV/rvv/vreductions-mask.ll     | 558 ++++++++++++++++++
 3 files changed, 1030 insertions(+), 307 deletions(-)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/reduce-add.ll

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll
index 44d4a8a1e04cda..0d31ec5f784352 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll
@@ -763,3 +763,475 @@ define zeroext i1 @vreduce_add_v64i1(<64 x i1> %v) {
   %red = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> %v)
   ret i1 %red
 }
+
+declare i1 @llvm.vector.reduce.or.v128i1(<128 x i1>)
+
+define zeroext i1 @vreduce_or_v128i1(<128 x i1> %v) {
+; CHECK-LABEL: vreduce_or_v128i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.v128i1(<128 x i1>)
+
+define zeroext i1 @vreduce_xor_v128i1(<128 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_v128i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.v128i1(<128 x i1>)
+
+define zeroext i1 @vreduce_and_v128i1(<128 x i1> %v) {
+; CHECK-LABEL: vreduce_and_v128i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.umax.v128i1(<128 x i1>)
+
+define zeroext i1 @vreduce_umax_v128i1(<128 x i1> %v) {
+; CHECK-LABEL: vreduce_umax_v128i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.smax.v128i1(<128 x i1>)
+
+define zeroext i1 @vreduce_smax_v128i1(<128 x i1> %v) {
+; CHECK-LABEL: vreduce_smax_v128i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.umin.v128i1(<128 x i1>)
+
+define zeroext i1 @vreduce_umin_v128i1(<128 x i1> %v) {
+; CHECK-LABEL: vreduce_umin_v128i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmnot.m v8, v0
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.smin.v128i1(<128 x i1>)
+
+define zeroext i1 @vreduce_smin_v128i1(<128 x i1> %v) {
+; CHECK-LABEL: vreduce_smin_v128i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.or.v256i1(<256 x i1>)
+
+define zeroext i1 @vreduce_or_v256i1(<256 x i1> %v) {
+; CHECK-LABEL: vreduce_or_v256i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v8, v0, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.v256i1(<256 x i1>)
+
+define zeroext i1 @vreduce_xor_v256i1(<256 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_v256i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmxor.mm v8, v0, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.xor.v256i1(<256 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.v256i1(<256 x i1>)
+
+define zeroext i1 @vreduce_and_v256i1(<256 x i1> %v) {
+; CHECK-LABEL: vreduce_and_v256i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmnand.mm v8, v0, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.umax.v256i1(<256 x i1>)
+
+define zeroext i1 @vreduce_umax_v256i1(<256 x i1> %v) {
+; CHECK-LABEL: vreduce_umax_v256i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v8, v0, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.umax.v256i1(<256 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.smax.v256i1(<256 x i1>)
+
+define zeroext i1 @vreduce_smax_v256i1(<256 x i1> %v) {
+; CHECK-LABEL: vreduce_smax_v256i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmnand.mm v8, v0, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.smax.v256i1(<256 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.umin.v256i1(<256 x i1>)
+
+define zeroext i1 @vreduce_umin_v256i1(<256 x i1> %v) {
+; CHECK-LABEL: vreduce_umin_v256i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmnand.mm v8, v0, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.umin.v256i1(<256 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.smin.v256i1(<256 x i1>)
+
+define zeroext i1 @vreduce_smin_v256i1(<256 x i1> %v) {
+; CHECK-LABEL: vreduce_smin_v256i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v8, v0, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.smin.v256i1(<256 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.or.v512i1(<512 x i1>)
+
+define zeroext i1 @vreduce_or_v512i1(<512 x i1> %v) {
+; CHECK-LABEL: vreduce_or_v512i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v8, v8, v10
+; CHECK-NEXT:    vmor.mm v9, v0, v9
+; CHECK-NEXT:    vmor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.v512i1(<512 x i1>)
+
+define zeroext i1 @vreduce_xor_v512i1(<512 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_v512i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmxor.mm v8, v8, v10
+; CHECK-NEXT:    vmxor.mm v9, v0, v9
+; CHECK-NEXT:    vmxor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.xor.v512i1(<512 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.v512i1(<512 x i1>)
+
+define zeroext i1 @vreduce_and_v512i1(<512 x i1> %v) {
+; CHECK-LABEL: vreduce_and_v512i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v0, v9
+; CHECK-NEXT:    vmnand.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.umax.v512i1(<512 x i1>)
+
+define zeroext i1 @vreduce_umax_v512i1(<512 x i1> %v) {
+; CHECK-LABEL: vreduce_umax_v512i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v8, v8, v10
+; CHECK-NEXT:    vmor.mm v9, v0, v9
+; CHECK-NEXT:    vmor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.umax.v512i1(<512 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.smax.v512i1(<512 x i1>)
+
+define zeroext i1 @vreduce_smax_v512i1(<512 x i1> %v) {
+; CHECK-LABEL: vreduce_smax_v512i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v0, v9
+; CHECK-NEXT:    vmnand.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.smax.v512i1(<512 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.umin.v512i1(<512 x i1>)
+
+define zeroext i1 @vreduce_umin_v512i1(<512 x i1> %v) {
+; CHECK-LABEL: vreduce_umin_v512i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v0, v9
+; CHECK-NEXT:    vmnand.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.umin.v512i1(<512 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.smin.v512i1(<512 x i1>)
+
+define zeroext i1 @vreduce_smin_v512i1(<512 x i1> %v) {
+; CHECK-LABEL: vreduce_smin_v512i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v8, v8, v10
+; CHECK-NEXT:    vmor.mm v9, v0, v9
+; CHECK-NEXT:    vmor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.smin.v512i1(<512 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1>)
+
+define zeroext i1 @vreduce_or_v1024i1(<1024 x i1> %v) {
+; CHECK-LABEL: vreduce_or_v1024i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v10, v10, v14
+; CHECK-NEXT:    vmor.mm v8, v8, v12
+; CHECK-NEXT:    vmor.mm v9, v9, v13
+; CHECK-NEXT:    vmor.mm v11, v0, v11
+; CHECK-NEXT:    vmor.mm v8, v8, v10
+; CHECK-NEXT:    vmor.mm v9, v11, v9
+; CHECK-NEXT:    vmor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.v1024i1(<1024 x i1>)
+
+define zeroext i1 @vreduce_xor_v1024i1(<1024 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_v1024i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmxor.mm v10, v10, v14
+; CHECK-NEXT:    vmxor.mm v8, v8, v12
+; CHECK-NEXT:    vmxor.mm v9, v9, v13
+; CHECK-NEXT:    vmxor.mm v11, v0, v11
+; CHECK-NEXT:    vmxor.mm v8, v8, v10
+; CHECK-NEXT:    vmxor.mm v9, v11, v9
+; CHECK-NEXT:    vmxor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.xor.v1024i1(<1024 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1>)
+
+define zeroext i1 @vreduce_and_v1024i1(<1024 x i1> %v) {
+; CHECK-LABEL: vreduce_and_v1024i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v10, v10, v14
+; CHECK-NEXT:    vmand.mm v8, v8, v12
+; CHECK-NEXT:    vmand.mm v9, v9, v13
+; CHECK-NEXT:    vmand.mm v11, v0, v11
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v11, v9
+; CHECK-NEXT:    vmnand.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.umax.v1024i1(<1024 x i1>)
+
+define zeroext i1 @vreduce_umax_v1024i1(<1024 x i1> %v) {
+; CHECK-LABEL: vreduce_umax_v1024i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v10, v10, v14
+; CHECK-NEXT:    vmor.mm v8, v8, v12
+; CHECK-NEXT:    vmor.mm v9, v9, v13
+; CHECK-NEXT:    vmor.mm v11, v0, v11
+; CHECK-NEXT:    vmor.mm v8, v8, v10
+; CHECK-NEXT:    vmor.mm v9, v11, v9
+; CHECK-NEXT:    vmor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.umax.v1024i1(<1024 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.smax.v1024i1(<1024 x i1>)
+
+define zeroext i1 @vreduce_smax_v1024i1(<1024 x i1> %v) {
+; CHECK-LABEL: vreduce_smax_v1024i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v10, v10, v14
+; CHECK-NEXT:    vmand.mm v8, v8, v12
+; CHECK-NEXT:    vmand.mm v9, v9, v13
+; CHECK-NEXT:    vmand.mm v11, v0, v11
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v11, v9
+; CHECK-NEXT:    vmnand.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.smax.v1024i1(<1024 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.umin.v1024i1(<1024 x i1>)
+
+define zeroext i1 @vreduce_umin_v1024i1(<1024 x i1> %v) {
+; CHECK-LABEL: vreduce_umin_v1024i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v10, v10, v14
+; CHECK-NEXT:    vmand.mm v8, v8, v12
+; CHECK-NEXT:    vmand.mm v9, v9, v13
+; CHECK-NEXT:    vmand.mm v11, v0, v11
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v11, v9
+; CHECK-NEXT:    vmnand.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.umin.v1024i1(<1024 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.smin.v1024i1(<1024 x i1>)
+
+define zeroext i1 @vreduce_smin_v1024i1(<1024 x i1> %v) {
+; CHECK-LABEL: vreduce_smin_v1024i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v10, v10, v14
+; CHECK-NEXT:    vmor.mm v8, v8, v12
+; CHECK-NEXT:    vmor.mm v9, v9, v13
+; CHECK-NEXT:    vmor.mm v11, v0, v11
+; CHECK-NEXT:    vmor.mm v8, v8, v10
+; CHECK-NEXT:    vmor.mm v9, v11, v9
+; CHECK-NEXT:    vmor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.smin.v1024i1(<1024 x i1> %v)
+  ret i1 %red
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/reduce-add.ll b/llvm/test/CodeGen/RISCV/rvv/reduce-add.ll
deleted file mode 100644
index 9fef8a453b155e..00000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/reduce-add.ll
+++ /dev/null
@@ -1,307 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
-
-define i1 @reduce_and_i1(<1 x i1> %a) {
-; CHECK-LABEL: reduce_and_i1:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vfirst.m a0, v0
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i2(<2 x i1> %a) {
-; CHECK-LABEL: reduce_and_i2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT:    vmnot.m v8, v0
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i4(<4 x i1> %a) {
-; CHECK-LABEL: reduce_and_i4:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vmnot.m v8, v0
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i8(<8 x i1> %a) {
-; CHECK-LABEL: reduce_and_i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vmnot.m v8, v0
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i16(<16 x i1> %a) {
-; CHECK-LABEL: reduce_and_i16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vmnot.m v8, v0
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i32(<32 x i1> %a) {
-; CHECK-LABEL: reduce_and_i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a0, 32
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT:    vmnot.m v8, v0
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i64(<64 x i1> %a) {
-; CHECK-LABEL: reduce_and_i64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a0, 64
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT:    vmnot.m v8, v0
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i128(<128 x i1> %a) {
-; CHECK-LABEL: reduce_and_i128:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a0, 128
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
-; CHECK-NEXT:    vmnot.m v8, v0
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i256(<256 x i1> %a) {
-; CHECK-LABEL: reduce_and_i256:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a0, 128
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
-; CHECK-NEXT:    vmnand.mm v8, v0, v8
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i512(<512 x i1> %a) {
-; CHECK-LABEL: reduce_and_i512:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a0, 128
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
-; CHECK-NEXT:    vmand.mm v8, v8, v10
-; CHECK-NEXT:    vmand.mm v9, v0, v9
-; CHECK-NEXT:    vmnand.mm v8, v9, v8
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i1024(<1024 x i1> %a) {
-; CHECK-LABEL: reduce_and_i1024:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a0, 128
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
-; CHECK-NEXT:    vmand.mm v10, v10, v14
-; CHECK-NEXT:    vmand.mm v8, v8, v12
-; CHECK-NEXT:    vmand.mm v9, v9, v13
-; CHECK-NEXT:    vmand.mm v11, v0, v11
-; CHECK-NEXT:    vmand.mm v8, v8, v10
-; CHECK-NEXT:    vmand.mm v9, v11, v9
-; CHECK-NEXT:    vmnand.mm v8, v9, v8
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> %a)
-  ret i1 %1
-}
-
-
-define i1 @reduce_and_i1_vscale(<vscale x 1 x i1> %a) {
-; CHECK-LABEL: reduce_and_i1_vscale:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vmnot.m v8, v0
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i2_vscale(<vscale x 2 x i1> %a) {
-; CHECK-LABEL: reduce_and_i2_vscale:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
-; CHECK-NEXT:    vmnot.m v8, v0
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i4_vscale(<vscale x 4 x i1> %a) {
-; CHECK-LABEL: reduce_and_i4_vscale:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vmnot.m v8, v0
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i8_vscale(<vscale x 8 x i1> %a) {
-; CHECK-LABEL: reduce_and_i8_vscale:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vmnot.m v8, v0
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.nxv8i1(<vscale x 8 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i16_vscale(<vscale x 16 x i1> %a) {
-; CHECK-LABEL: reduce_and_i16_vscale:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT:    vmnot.m v8, v0
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i32_vscale(<vscale x 32 x i1> %a) {
-; CHECK-LABEL: reduce_and_i32_vscale:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vmnot.m v8, v0
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i64_vscale(<vscale x 64 x i1> %a) {
-; CHECK-LABEL: reduce_and_i64_vscale:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
-; CHECK-NEXT:    vmnot.m v8, v0
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i128_vscale(<vscale x 128 x i1> %a) {
-; CHECK-LABEL: reduce_and_i128_vscale:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
-; CHECK-NEXT:    vmand.mm v8, v0, v8
-; CHECK-NEXT:    vmnot.m v8, v8
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i256_vscale(<vscale x 256 x i1> %a) {
-; CHECK-LABEL: reduce_and_i256_vscale:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
-; CHECK-NEXT:    vmand.mm v8, v8, v10
-; CHECK-NEXT:    vmand.mm v9, v0, v9
-; CHECK-NEXT:    vmand.mm v8, v9, v8
-; CHECK-NEXT:    vmnot.m v8, v8
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i512_vscale(<vscale x 512 x i1> %a) {
-; CHECK-LABEL: reduce_and_i512_vscale:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
-; CHECK-NEXT:    vmand.mm v10, v10, v14
-; CHECK-NEXT:    vmand.mm v8, v8, v12
-; CHECK-NEXT:    vmand.mm v9, v9, v13
-; CHECK-NEXT:    vmand.mm v11, v0, v11
-; CHECK-NEXT:    vmand.mm v8, v8, v10
-; CHECK-NEXT:    vmand.mm v9, v11, v9
-; CHECK-NEXT:    vmand.mm v8, v9, v8
-; CHECK-NEXT:    vmnot.m v8, v8
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> %a)
-  ret i1 %1
-}
-
-define i1 @reduce_and_i1024_vscale(<vscale x 1024 x i1> %a) {
-; CHECK-LABEL: reduce_and_i1024_vscale:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
-; CHECK-NEXT:    vmand.mm v14, v14, v22
-; CHECK-NEXT:    vmand.mm v10, v10, v18
-; CHECK-NEXT:    vmand.mm v12, v12, v20
-; CHECK-NEXT:    vmand.mm v8, v8, v16
-; CHECK-NEXT:    vmand.mm v13, v13, v21
-; CHECK-NEXT:    vmand.mm v9, v9, v17
-; CHECK-NEXT:    vmand.mm v11, v11, v19
-; CHECK-NEXT:    vmand.mm v15, v0, v15
-; CHECK-NEXT:    vmand.mm v10, v10, v14
-; CHECK-NEXT:    vmand.mm v8, v8, v12
-; CHECK-NEXT:    vmand.mm v9, v9, v13
-; CHECK-NEXT:    vmand.mm v11, v15, v11
-; CHECK-NEXT:    vmand.mm v8, v8, v10
-; CHECK-NEXT:    vmand.mm v9, v11, v9
-; CHECK-NEXT:    vmand.mm v8, v9, v8
-; CHECK-NEXT:    vmnot.m v8, v8
-; CHECK-NEXT:    vcpop.m a0, v8
-; CHECK-NEXT:    seqz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> %a)
-  ret i1 %1
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll
index fcd7482dffe2ec..d99fd036b4fc92 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll
@@ -750,3 +750,561 @@ define zeroext i1 @vreduce_add_nxv64i1(<vscale x 64 x i1> %v) {
   %red = call i1 @llvm.vector.reduce.add.nxv64i1(<vscale x 64 x i1> %v)
   ret i1 %red
 }
+
+declare i1 @llvm.vector.reduce.or.nxv128i1(<vscale x 128 x i1>)
+
+define zeroext i1 @vreduce_or_nxv128i1(<vscale x 128 x i1> %v) {
+; CHECK-LABEL: vreduce_or_nxv128i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v8, v0, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.or.nxv128i1(<vscale x 128 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.nxv128i1(<vscale x 128 x i1>)
+
+define zeroext i1 @vreduce_xor_nxv128i1(<vscale x 128 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_nxv128i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmxor.mm v8, v0, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.xor.nxv128i1(<vscale x 128 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1>)
+
+define zeroext i1 @vreduce_and_nxv128i1(<vscale x 128 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv128i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v8, v0, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.umax.nxv128i1(<vscale x 128 x i1>)
+
+define zeroext i1 @vreduce_umax_nxv128i1(<vscale x 128 x i1> %v) {
+; CHECK-LABEL: vreduce_umax_nxv128i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v8, v0, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.umax.nxv128i1(<vscale x 128 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.smax.nxv128i1(<vscale x 128 x i1>)
+
+define zeroext i1 @vreduce_smax_nxv128i1(<vscale x 128 x i1> %v) {
+; CHECK-LABEL: vreduce_smax_nxv128i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v8, v0, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.smax.nxv128i1(<vscale x 128 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.umin.nxv128i1(<vscale x 128 x i1>)
+
+define zeroext i1 @vreduce_umin_nxv128i1(<vscale x 128 x i1> %v) {
+; CHECK-LABEL: vreduce_umin_nxv128i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v8, v0, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.umin.nxv128i1(<vscale x 128 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.smin.nxv128i1(<vscale x 128 x i1>)
+
+define zeroext i1 @vreduce_smin_nxv128i1(<vscale x 128 x i1> %v) {
+; CHECK-LABEL: vreduce_smin_nxv128i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v8, v0, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.smin.nxv128i1(<vscale x 128 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.or.nxv256i1(<vscale x 256 x i1>)
+
+define zeroext i1 @vreduce_or_nxv256i1(<vscale x 256 x i1> %v) {
+; CHECK-LABEL: vreduce_or_nxv256i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v8, v8, v10
+; CHECK-NEXT:    vmor.mm v9, v0, v9
+; CHECK-NEXT:    vmor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.or.nxv256i1(<vscale x 256 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.nxv256i1(<vscale x 256 x i1>)
+
+define zeroext i1 @vreduce_xor_nxv256i1(<vscale x 256 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_nxv256i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmxor.mm v8, v8, v10
+; CHECK-NEXT:    vmxor.mm v9, v0, v9
+; CHECK-NEXT:    vmxor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.xor.nxv256i1(<vscale x 256 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1>)
+
+define zeroext i1 @vreduce_and_nxv256i1(<vscale x 256 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv256i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v0, v9
+; CHECK-NEXT:    vmand.mm v8, v9, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.umax.nxv256i1(<vscale x 256 x i1>)
+
+define zeroext i1 @vreduce_umax_nxv256i1(<vscale x 256 x i1> %v) {
+; CHECK-LABEL: vreduce_umax_nxv256i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v8, v8, v10
+; CHECK-NEXT:    vmor.mm v9, v0, v9
+; CHECK-NEXT:    vmor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.umax.nxv256i1(<vscale x 256 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.smax.nxv256i1(<vscale x 256 x i1>)
+
+define zeroext i1 @vreduce_smax_nxv256i1(<vscale x 256 x i1> %v) {
+; CHECK-LABEL: vreduce_smax_nxv256i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v0, v9
+; CHECK-NEXT:    vmand.mm v8, v9, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.smax.nxv256i1(<vscale x 256 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.umin.nxv256i1(<vscale x 256 x i1>)
+
+define zeroext i1 @vreduce_umin_nxv256i1(<vscale x 256 x i1> %v) {
+; CHECK-LABEL: vreduce_umin_nxv256i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v0, v9
+; CHECK-NEXT:    vmand.mm v8, v9, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.umin.nxv256i1(<vscale x 256 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.smin.nxv256i1(<vscale x 256 x i1>)
+
+define zeroext i1 @vreduce_smin_nxv256i1(<vscale x 256 x i1> %v) {
+; CHECK-LABEL: vreduce_smin_nxv256i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v8, v8, v10
+; CHECK-NEXT:    vmor.mm v9, v0, v9
+; CHECK-NEXT:    vmor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.smin.nxv256i1(<vscale x 256 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.or.nxv512i1(<vscale x 512 x i1>)
+
+define zeroext i1 @vreduce_or_nxv512i1(<vscale x 512 x i1> %v) {
+; CHECK-LABEL: vreduce_or_nxv512i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v10, v10, v14
+; CHECK-NEXT:    vmor.mm v8, v8, v12
+; CHECK-NEXT:    vmor.mm v9, v9, v13
+; CHECK-NEXT:    vmor.mm v11, v0, v11
+; CHECK-NEXT:    vmor.mm v8, v8, v10
+; CHECK-NEXT:    vmor.mm v9, v11, v9
+; CHECK-NEXT:    vmor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.or.nxv512i1(<vscale x 512 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.nxv512i1(<vscale x 512 x i1>)
+
+define zeroext i1 @vreduce_xor_nxv512i1(<vscale x 512 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_nxv512i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmxor.mm v10, v10, v14
+; CHECK-NEXT:    vmxor.mm v8, v8, v12
+; CHECK-NEXT:    vmxor.mm v9, v9, v13
+; CHECK-NEXT:    vmxor.mm v11, v0, v11
+; CHECK-NEXT:    vmxor.mm v8, v8, v10
+; CHECK-NEXT:    vmxor.mm v9, v11, v9
+; CHECK-NEXT:    vmxor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.xor.nxv512i1(<vscale x 512 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1>)
+
+define zeroext i1 @vreduce_and_nxv512i1(<vscale x 512 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv512i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v10, v10, v14
+; CHECK-NEXT:    vmand.mm v8, v8, v12
+; CHECK-NEXT:    vmand.mm v9, v9, v13
+; CHECK-NEXT:    vmand.mm v11, v0, v11
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v11, v9
+; CHECK-NEXT:    vmand.mm v8, v9, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.umax.nxv512i1(<vscale x 512 x i1>)
+
+define zeroext i1 @vreduce_umax_nxv512i1(<vscale x 512 x i1> %v) {
+; CHECK-LABEL: vreduce_umax_nxv512i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v10, v10, v14
+; CHECK-NEXT:    vmor.mm v8, v8, v12
+; CHECK-NEXT:    vmor.mm v9, v9, v13
+; CHECK-NEXT:    vmor.mm v11, v0, v11
+; CHECK-NEXT:    vmor.mm v8, v8, v10
+; CHECK-NEXT:    vmor.mm v9, v11, v9
+; CHECK-NEXT:    vmor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.umax.nxv512i1(<vscale x 512 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.smax.nxv512i1(<vscale x 512 x i1>)
+
+define zeroext i1 @vreduce_smax_nxv512i1(<vscale x 512 x i1> %v) {
+; CHECK-LABEL: vreduce_smax_nxv512i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v10, v10, v14
+; CHECK-NEXT:    vmand.mm v8, v8, v12
+; CHECK-NEXT:    vmand.mm v9, v9, v13
+; CHECK-NEXT:    vmand.mm v11, v0, v11
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v11, v9
+; CHECK-NEXT:    vmand.mm v8, v9, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.smax.nxv512i1(<vscale x 512 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.umin.nxv512i1(<vscale x 512 x i1>)
+
+define zeroext i1 @vreduce_umin_nxv512i1(<vscale x 512 x i1> %v) {
+; CHECK-LABEL: vreduce_umin_nxv512i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v10, v10, v14
+; CHECK-NEXT:    vmand.mm v8, v8, v12
+; CHECK-NEXT:    vmand.mm v9, v9, v13
+; CHECK-NEXT:    vmand.mm v11, v0, v11
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v11, v9
+; CHECK-NEXT:    vmand.mm v8, v9, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.umin.nxv512i1(<vscale x 512 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.smin.nxv512i1(<vscale x 512 x i1>)
+
+define zeroext i1 @vreduce_smin_nxv512i1(<vscale x 512 x i1> %v) {
+; CHECK-LABEL: vreduce_smin_nxv512i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v10, v10, v14
+; CHECK-NEXT:    vmor.mm v8, v8, v12
+; CHECK-NEXT:    vmor.mm v9, v9, v13
+; CHECK-NEXT:    vmor.mm v11, v0, v11
+; CHECK-NEXT:    vmor.mm v8, v8, v10
+; CHECK-NEXT:    vmor.mm v9, v11, v9
+; CHECK-NEXT:    vmor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.smin.nxv512i1(<vscale x 512 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.or.nxv1024i1(<vscale x 1024 x i1>)
+
+define zeroext i1 @vreduce_or_nxv1024i1(<vscale x 1024 x i1> %v) {
+; CHECK-LABEL: vreduce_or_nxv1024i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v14, v14, v22
+; CHECK-NEXT:    vmor.mm v10, v10, v18
+; CHECK-NEXT:    vmor.mm v12, v12, v20
+; CHECK-NEXT:    vmor.mm v8, v8, v16
+; CHECK-NEXT:    vmor.mm v13, v13, v21
+; CHECK-NEXT:    vmor.mm v9, v9, v17
+; CHECK-NEXT:    vmor.mm v11, v11, v19
+; CHECK-NEXT:    vmor.mm v15, v0, v15
+; CHECK-NEXT:    vmor.mm v10, v10, v14
+; CHECK-NEXT:    vmor.mm v8, v8, v12
+; CHECK-NEXT:    vmor.mm v9, v9, v13
+; CHECK-NEXT:    vmor.mm v11, v15, v11
+; CHECK-NEXT:    vmor.mm v8, v8, v10
+; CHECK-NEXT:    vmor.mm v9, v11, v9
+; CHECK-NEXT:    vmor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.or.nxv1024i1(<vscale x 1024 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.nxv1024i1(<vscale x 1024 x i1>)
+
+define zeroext i1 @vreduce_xor_nxv1024i1(<vscale x 1024 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_nxv1024i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmxor.mm v14, v14, v22
+; CHECK-NEXT:    vmxor.mm v10, v10, v18
+; CHECK-NEXT:    vmxor.mm v12, v12, v20
+; CHECK-NEXT:    vmxor.mm v8, v8, v16
+; CHECK-NEXT:    vmxor.mm v13, v13, v21
+; CHECK-NEXT:    vmxor.mm v9, v9, v17
+; CHECK-NEXT:    vmxor.mm v11, v11, v19
+; CHECK-NEXT:    vmxor.mm v15, v0, v15
+; CHECK-NEXT:    vmxor.mm v10, v10, v14
+; CHECK-NEXT:    vmxor.mm v8, v8, v12
+; CHECK-NEXT:    vmxor.mm v9, v9, v13
+; CHECK-NEXT:    vmxor.mm v11, v15, v11
+; CHECK-NEXT:    vmxor.mm v8, v8, v10
+; CHECK-NEXT:    vmxor.mm v9, v11, v9
+; CHECK-NEXT:    vmxor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.xor.nxv1024i1(<vscale x 1024 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1>)
+
+define zeroext i1 @vreduce_and_nxv1024i1(<vscale x 1024 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv1024i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v14, v14, v22
+; CHECK-NEXT:    vmand.mm v10, v10, v18
+; CHECK-NEXT:    vmand.mm v12, v12, v20
+; CHECK-NEXT:    vmand.mm v8, v8, v16
+; CHECK-NEXT:    vmand.mm v13, v13, v21
+; CHECK-NEXT:    vmand.mm v9, v9, v17
+; CHECK-NEXT:    vmand.mm v11, v11, v19
+; CHECK-NEXT:    vmand.mm v15, v0, v15
+; CHECK-NEXT:    vmand.mm v10, v10, v14
+; CHECK-NEXT:    vmand.mm v8, v8, v12
+; CHECK-NEXT:    vmand.mm v9, v9, v13
+; CHECK-NEXT:    vmand.mm v11, v15, v11
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v11, v9
+; CHECK-NEXT:    vmand.mm v8, v9, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.umax.nxv1024i1(<vscale x 1024 x i1>)
+
+define zeroext i1 @vreduce_umax_nxv1024i1(<vscale x 1024 x i1> %v) {
+; CHECK-LABEL: vreduce_umax_nxv1024i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v14, v14, v22
+; CHECK-NEXT:    vmor.mm v10, v10, v18
+; CHECK-NEXT:    vmor.mm v12, v12, v20
+; CHECK-NEXT:    vmor.mm v8, v8, v16
+; CHECK-NEXT:    vmor.mm v13, v13, v21
+; CHECK-NEXT:    vmor.mm v9, v9, v17
+; CHECK-NEXT:    vmor.mm v11, v11, v19
+; CHECK-NEXT:    vmor.mm v15, v0, v15
+; CHECK-NEXT:    vmor.mm v10, v10, v14
+; CHECK-NEXT:    vmor.mm v8, v8, v12
+; CHECK-NEXT:    vmor.mm v9, v9, v13
+; CHECK-NEXT:    vmor.mm v11, v15, v11
+; CHECK-NEXT:    vmor.mm v8, v8, v10
+; CHECK-NEXT:    vmor.mm v9, v11, v9
+; CHECK-NEXT:    vmor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.umax.nxv1024i1(<vscale x 1024 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.smax.nxv1024i1(<vscale x 1024 x i1>)
+
+define zeroext i1 @vreduce_smax_nxv1024i1(<vscale x 1024 x i1> %v) {
+; CHECK-LABEL: vreduce_smax_nxv1024i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v14, v14, v22
+; CHECK-NEXT:    vmand.mm v10, v10, v18
+; CHECK-NEXT:    vmand.mm v12, v12, v20
+; CHECK-NEXT:    vmand.mm v8, v8, v16
+; CHECK-NEXT:    vmand.mm v13, v13, v21
+; CHECK-NEXT:    vmand.mm v9, v9, v17
+; CHECK-NEXT:    vmand.mm v11, v11, v19
+; CHECK-NEXT:    vmand.mm v15, v0, v15
+; CHECK-NEXT:    vmand.mm v10, v10, v14
+; CHECK-NEXT:    vmand.mm v8, v8, v12
+; CHECK-NEXT:    vmand.mm v9, v9, v13
+; CHECK-NEXT:    vmand.mm v11, v15, v11
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v11, v9
+; CHECK-NEXT:    vmand.mm v8, v9, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.smax.nxv1024i1(<vscale x 1024 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.umin.nxv1024i1(<vscale x 1024 x i1>)
+
+define zeroext i1 @vreduce_umin_nxv1024i1(<vscale x 1024 x i1> %v) {
+; CHECK-LABEL: vreduce_umin_nxv1024i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmand.mm v14, v14, v22
+; CHECK-NEXT:    vmand.mm v10, v10, v18
+; CHECK-NEXT:    vmand.mm v12, v12, v20
+; CHECK-NEXT:    vmand.mm v8, v8, v16
+; CHECK-NEXT:    vmand.mm v13, v13, v21
+; CHECK-NEXT:    vmand.mm v9, v9, v17
+; CHECK-NEXT:    vmand.mm v11, v11, v19
+; CHECK-NEXT:    vmand.mm v15, v0, v15
+; CHECK-NEXT:    vmand.mm v10, v10, v14
+; CHECK-NEXT:    vmand.mm v8, v8, v12
+; CHECK-NEXT:    vmand.mm v9, v9, v13
+; CHECK-NEXT:    vmand.mm v11, v15, v11
+; CHECK-NEXT:    vmand.mm v8, v8, v10
+; CHECK-NEXT:    vmand.mm v9, v11, v9
+; CHECK-NEXT:    vmand.mm v8, v9, v8
+; CHECK-NEXT:    vmnot.m v8, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.umin.nxv1024i1(<vscale x 1024 x i1> %v)
+  ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.smin.nxv1024i1(<vscale x 1024 x i1>)
+
+define zeroext i1 @vreduce_smin_nxv1024i1(<vscale x 1024 x i1> %v) {
+; CHECK-LABEL: vreduce_smin_nxv1024i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vmor.mm v14, v14, v22
+; CHECK-NEXT:    vmor.mm v10, v10, v18
+; CHECK-NEXT:    vmor.mm v12, v12, v20
+; CHECK-NEXT:    vmor.mm v8, v8, v16
+; CHECK-NEXT:    vmor.mm v13, v13, v21
+; CHECK-NEXT:    vmor.mm v9, v9, v17
+; CHECK-NEXT:    vmor.mm v11, v11, v19
+; CHECK-NEXT:    vmor.mm v15, v0, v15
+; CHECK-NEXT:    vmor.mm v10, v10, v14
+; CHECK-NEXT:    vmor.mm v8, v8, v12
+; CHECK-NEXT:    vmor.mm v9, v9, v13
+; CHECK-NEXT:    vmor.mm v11, v15, v11
+; CHECK-NEXT:    vmor.mm v8, v8, v10
+; CHECK-NEXT:    vmor.mm v9, v11, v9
+; CHECK-NEXT:    vmor.mm v8, v9, v8
+; CHECK-NEXT:    vcpop.m a0, v8
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    ret
+  %red = call i1 @llvm.vector.reduce.smin.nxv1024i1(<vscale x 1024 x i1> %v)
+  ret i1 %red
+}

>From 97b0a4531b0be89b35dfaff227f24810e2a26132 Mon Sep 17 00:00:00 2001
From: Shao-Ce SUN <sunshaoce at outlook.com>
Date: Mon, 9 Dec 2024 11:33:19 +0800
Subject: [PATCH 3/3] fixup! Move the judgment for `v1i1` out of the scope of
 `AND`

---
 .../lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 4b5f5b0c8e047d..121efde9feac3a 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1536,15 +1536,16 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
   std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
   Type *ElementTy = Ty->getElementType();
   if (ElementTy->isIntegerTy(1)) {
+    // Example sequences:
+    //   vfirst.m a0, v0
+    //   seqz a0, a0
+    if (LT.second == MVT::v1i1)
+      return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second, CostKind) +
+             getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
+                                CmpInst::ICMP_EQ, CostKind);
+
     if (ISD == ISD::AND) {
       // Example sequences:
-      //   vfirst.m a0, v0
-      //   seqz a0, a0
-      if (LT.second == MVT::v1i1)
-        return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second, CostKind) +
-               getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
-                                  CmpInst::ICMP_EQ, CostKind);
-      // Example sequences:
       //   vsetvli a0, zero, e8, mf8, ta, ma
       //   vmand.mm v8, v9, v8 ; needed every time type is split
       //   vmnot.m v8, v0



More information about the llvm-commits mailing list