[llvm] e046c0c - [RISCV] Support scalable-vector integer reduction intrinsics

Fraser Cormack via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 5 02:16:49 PST 2021


Author: Fraser Cormack
Date: 2021-02-05T10:10:08Z
New Revision: e046c0c28b108e3799d139e11075a274dcc2ef22

URL: https://github.com/llvm/llvm-project/commit/e046c0c28b108e3799d139e11075a274dcc2ef22
DIFF: https://github.com/llvm/llvm-project/commit/e046c0c28b108e3799d139e11075a274dcc2ef22.diff

LOG: [RISCV] Support scalable-vector integer reduction intrinsics

This patch adds support for the integer reduction intrinsics supported
by RVV. This excludes "mul" which has no corresponding instruction.

The reduction instructions in RVV have slightly complicated type
constraints given they always produce a single "M1" vector register.

They are lowered to custom nodes including the second "scalar" reduction
operand to simplify the patterns and in the hope that they can be useful
for future DAG combines.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D95620

Added: 
    llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll
    llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.h
    llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
    llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
    llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9e914881c020..9208f33d6714 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -377,6 +377,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom);
       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
+
+      setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
+      setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
+      setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
+      setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
+      setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
+      setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
+      setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
+      setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
     }
 
     for (MVT VT : BoolVecVTs) {
@@ -418,6 +427,17 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       // Custom-lower insert/extract operations to simplify patterns.
       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+
+      // Custom-lower reduction operations to set up the corresponding custom
+      // nodes' operands.
+      setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
+      setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
+      setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
+      setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+      setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
+      setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
+      setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
+      setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
     }
 
     // Expand various CCs to best match the RVV ISA, which natively supports UNE
@@ -893,6 +913,15 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
 
     return Op;
   }
+  case ISD::VECREDUCE_ADD:
+  case ISD::VECREDUCE_UMAX:
+  case ISD::VECREDUCE_SMAX:
+  case ISD::VECREDUCE_UMIN:
+  case ISD::VECREDUCE_SMIN:
+  case ISD::VECREDUCE_AND:
+  case ISD::VECREDUCE_OR:
+  case ISD::VECREDUCE_XOR:
+    return lowerVECREDUCE(Op, DAG);
   }
 }
 
@@ -1615,6 +1644,60 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
   }
 }
 
+static std::pair<unsigned, uint64_t>
+getRVVReductionOpAndIdentityVal(unsigned ISDOpcode, unsigned EltSizeBits) {
+  switch (ISDOpcode) {
+  default:
+    llvm_unreachable("Unhandled reduction");
+  case ISD::VECREDUCE_ADD:
+    return {RISCVISD::VECREDUCE_ADD, 0};
+  case ISD::VECREDUCE_UMAX:
+    return {RISCVISD::VECREDUCE_UMAX, 0};
+  case ISD::VECREDUCE_SMAX:
+    return {RISCVISD::VECREDUCE_SMAX, minIntN(EltSizeBits)};
+  case ISD::VECREDUCE_UMIN:
+    return {RISCVISD::VECREDUCE_UMIN, maxUIntN(EltSizeBits)};
+  case ISD::VECREDUCE_SMIN:
+    return {RISCVISD::VECREDUCE_SMIN, maxIntN(EltSizeBits)};
+  case ISD::VECREDUCE_AND:
+    return {RISCVISD::VECREDUCE_AND, -1};
+  case ISD::VECREDUCE_OR:
+    return {RISCVISD::VECREDUCE_OR, 0};
+  case ISD::VECREDUCE_XOR:
+    return {RISCVISD::VECREDUCE_XOR, 0};
+  }
+}
+
+// Take a (supported) standard ISD reduction opcode and transform it to a RISCV
+// reduction opcode. Note that this returns a vector type, which must be
+// further processed to access the scalar result in element 0.
+SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  assert(Op.getValueType().isSimple() &&
+         Op.getOperand(0).getValueType().isSimple() &&
+         "Unexpected vector-reduce lowering");
+  MVT VecEltVT = Op.getOperand(0).getSimpleValueType().getVectorElementType();
+  unsigned RVVOpcode;
+  uint64_t IdentityVal;
+  std::tie(RVVOpcode, IdentityVal) =
+      getRVVReductionOpAndIdentityVal(Op.getOpcode(), VecEltVT.getSizeInBits());
+  // We have to perform a bit of a dance to get from our vector type to the
+  // correct LMUL=1 vector type. We divide our minimum VLEN (64) by the vector
+  // element type to find the type which fills a single register. Be careful to
+  // use the operand's vector element type rather than the reduction's value
+  // type, as that has likely been extended to XLEN.
+  unsigned NumElts = 64 / VecEltVT.getSizeInBits();
+  MVT M1VT = MVT::getScalableVectorVT(VecEltVT, NumElts);
+  SDValue IdentitySplat =
+      DAG.getSplatVector(M1VT, DL, DAG.getConstant(IdentityVal, DL, VecEltVT));
+  SDValue Reduction =
+      DAG.getNode(RVVOpcode, DL, M1VT, Op.getOperand(0), IdentitySplat);
+  SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
+                             DAG.getConstant(0, DL, Subtarget.getXLenVT()));
+  return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
+}
+
 // Returns the opcode of the target-specific SDNode that implements the 32-bit
 // form of the given Opcode.
 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
@@ -1903,6 +1986,19 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
     }
     break;
   }
+  case ISD::VECREDUCE_ADD:
+  case ISD::VECREDUCE_AND:
+  case ISD::VECREDUCE_OR:
+  case ISD::VECREDUCE_XOR:
+  case ISD::VECREDUCE_SMAX:
+  case ISD::VECREDUCE_UMAX:
+  case ISD::VECREDUCE_SMIN:
+  case ISD::VECREDUCE_UMIN:
+    // The custom-lowering for these nodes returns a vector whose first element
+    // is the result of the reduction. Extract its first element and let the
+    // legalization for EXTRACT_VECTOR_ELT do the rest of the job.
+    Results.push_back(lowerVECREDUCE(SDValue(N, 0), DAG));
+    break;
   }
 }
 
@@ -4160,6 +4256,14 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(VSLIDEDOWN)
   NODE_NAME_CASE(VID)
   NODE_NAME_CASE(VFNCVT_ROD)
+  NODE_NAME_CASE(VECREDUCE_ADD)
+  NODE_NAME_CASE(VECREDUCE_UMAX)
+  NODE_NAME_CASE(VECREDUCE_SMAX)
+  NODE_NAME_CASE(VECREDUCE_UMIN)
+  NODE_NAME_CASE(VECREDUCE_SMIN)
+  NODE_NAME_CASE(VECREDUCE_AND)
+  NODE_NAME_CASE(VECREDUCE_OR)
+  NODE_NAME_CASE(VECREDUCE_XOR)
   }
   // clang-format on
   return nullptr;

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 82a3f90ac08d..d4d1372c4dd0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -113,6 +113,24 @@ enum NodeType : unsigned {
   // float to single-width float, rounding towards odd). Takes a double-width
   // float vector and produces a single-width float vector.
   VFNCVT_ROD,
+  // These nodes match the semantics of the corresponding RVV vector reduction
+  // instructions. They produce a vector result which is the reduction
+  // performed over the first vector operand plus the first element of the
+  // second vector operand. The first operand is an unconstrained vector type,
+  // and the result and second operand's types are expected to be the
+  // corresponding full-width LMUL=1 type for the first operand:
+  //   nxv8i8 = vecreduce_add nxv32i8, nxv8i8
+  //   nxv2i32 = vecreduce_add nxv8i32, nxv2i32
+  // The 
diff erent in types does introduce extra vsetvli instructions but
+  // similarly it reduces the number of registers consumed per reduction.
+  VECREDUCE_ADD,
+  VECREDUCE_UMAX,
+  VECREDUCE_SMAX,
+  VECREDUCE_UMIN,
+  VECREDUCE_SMIN,
+  VECREDUCE_AND,
+  VECREDUCE_OR,
+  VECREDUCE_XOR,
 };
 } // namespace RISCVISD
 
@@ -314,6 +332,7 @@ class RISCVTargetLowering : public TargetLowering {
   SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
 
   bool isEligibleForTailCallOptimization(
       CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 76033ed567fe..8e0fd5dfa173 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -45,6 +45,13 @@ class SwapHelper<dag Prefix, dag A, dag B, dag Suffix, bit swap> {
    dag Value = !con(Prefix, !if(swap, B, A), !if(swap, A, B), Suffix);
 }
 
+def SDTRVVVecReduce : SDTypeProfile<1, 2, [
+  SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0, 2>
+]>;
+
+foreach kind = ["ADD", "UMAX", "SMAX", "UMIN", "SMIN", "AND", "OR", "XOR"] in
+  def rvv_vecreduce_#kind : SDNode<"RISCVISD::VECREDUCE_"#kind, SDTRVVVecReduce>;
+
 multiclass VPatUSLoadStoreSDNode<LLVMType type,
                                  int sew,
                                  LMULInfo vlmul,
@@ -355,6 +362,18 @@ multiclass VPatNConvertFP2ISDNode_V<SDNode vop, string instruction_name> {
   }
 }
 
+multiclass VPatReductionSDNode<SDNode vop, string instruction_name> {
+  foreach vti = AllIntegerVectors in {
+    defvar vti_m1 = !cast<VTypeInfo>("VI" # vti.SEW # "M1");
+    def: Pat<(vti_m1.Vector (vop (vti.Vector vti.RegClass:$rs1), VR:$rs2)),
+        (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX)
+            (vti_m1.Vector (IMPLICIT_DEF)),
+            (vti.Vector vti.RegClass:$rs1),
+            (vti_m1.Vector VR:$rs2),
+            vti.AVL, vti.SEW)>;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Patterns.
 //===----------------------------------------------------------------------===//
@@ -475,6 +494,16 @@ foreach vti = AllIntegerVectors in {
                  vti.RegClass:$rs2, simm5:$rs1, VMV0:$vm, vti.AVL, vti.SEW)>;
 }
 
+// 15.1. Vector Single-Width Integer Reduction Instructions
+defm "" : VPatReductionSDNode<rvv_vecreduce_ADD,  "PseudoVREDSUM">;
+defm "" : VPatReductionSDNode<rvv_vecreduce_UMAX, "PseudoVREDMAXU">;
+defm "" : VPatReductionSDNode<rvv_vecreduce_SMAX, "PseudoVREDMAX">;
+defm "" : VPatReductionSDNode<rvv_vecreduce_UMIN, "PseudoVREDMINU">;
+defm "" : VPatReductionSDNode<rvv_vecreduce_SMIN, "PseudoVREDMIN">;
+defm "" : VPatReductionSDNode<rvv_vecreduce_AND,  "PseudoVREDAND">;
+defm "" : VPatReductionSDNode<rvv_vecreduce_OR,   "PseudoVREDOR">;
+defm "" : VPatReductionSDNode<rvv_vecreduce_XOR,  "PseudoVREDXOR">;
+
 // 16.1. Vector Mask-Register Logical Instructions
 foreach mti = AllMasks in {
   def : Pat<(mti.Mask (and VR:$rs1, VR:$rs2)),

diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 27714cffc989..4ae0c7b97564 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -94,3 +94,19 @@ int RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
   // Prevent hoisting in unknown cases.
   return TTI::TCC_Free;
 }
+
+bool RISCVTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
+  // Currently, the ExpandReductions pass can't expand scalable-vector
+  // reductions, but we still request expansion as RVV doesn't support certain
+  // reductions and the SelectionDAG can't legalize them either.
+  switch (II->getIntrinsicID()) {
+  default:
+    return false;
+  case Intrinsic::vector_reduce_mul:
+  case Intrinsic::vector_reduce_fadd:
+  case Intrinsic::vector_reduce_fmul:
+  case Intrinsic::vector_reduce_fmax:
+  case Intrinsic::vector_reduce_fmin:
+    return true;
+  }
+}

diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 8d077e946305..cf37dc9a0aea 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -47,6 +47,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
                         Instruction *Inst = nullptr);
   int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
                           Type *Ty, TTI::TargetCostKind CostKind);
+
+  bool shouldExpandReduction(const IntrinsicInst *II) const;
 };
 
 } // end namespace llvm

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll
new file mode 100644
index 000000000000..6b228061c841
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll
@@ -0,0 +1,1641 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+
+declare i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8>)
+
+define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
+; CHECK-LABEL: vreduce_add_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8>)
+
+define signext i8 @vreduce_umax_nxv1i8(<vscale x 1 x i8> %v) {
+; CHECK-LABEL: vreduce_umax_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8>)
+
+define signext i8 @vreduce_smax_nxv1i8(<vscale x 1 x i8> %v) {
+; CHECK-LABEL: vreduce_smax_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, zero, -128
+; CHECK-NEXT:    vsetvli a1, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8>)
+
+define signext i8 @vreduce_umin_nxv1i8(<vscale x 1 x i8> %v) {
+; CHECK-LABEL: vreduce_umin_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8>)
+
+define signext i8 @vreduce_smin_nxv1i8(<vscale x 1 x i8> %v) {
+; CHECK-LABEL: vreduce_smin_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, zero, 127
+; CHECK-NEXT:    vsetvli a1, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8>)
+
+define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) {
+; CHECK-LABEL: vreduce_and_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8>)
+
+define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) {
+; CHECK-LABEL: vreduce_or_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8>)
+
+define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) {
+; CHECK-LABEL: vreduce_xor_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8>)
+
+define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
+; CHECK-LABEL: vreduce_add_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8>)
+
+define signext i8 @vreduce_umax_nxv2i8(<vscale x 2 x i8> %v) {
+; CHECK-LABEL: vreduce_umax_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8>)
+
+define signext i8 @vreduce_smax_nxv2i8(<vscale x 2 x i8> %v) {
+; CHECK-LABEL: vreduce_smax_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, zero, -128
+; CHECK-NEXT:    vsetvli a1, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8>)
+
+define signext i8 @vreduce_umin_nxv2i8(<vscale x 2 x i8> %v) {
+; CHECK-LABEL: vreduce_umin_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8>)
+
+define signext i8 @vreduce_smin_nxv2i8(<vscale x 2 x i8> %v) {
+; CHECK-LABEL: vreduce_smin_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, zero, 127
+; CHECK-NEXT:    vsetvli a1, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8>)
+
+define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) {
+; CHECK-LABEL: vreduce_and_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8>)
+
+define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) {
+; CHECK-LABEL: vreduce_or_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8>)
+
+define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) {
+; CHECK-LABEL: vreduce_xor_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8>)
+
+define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
+; CHECK-LABEL: vreduce_add_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8>)
+
+define signext i8 @vreduce_umax_nxv4i8(<vscale x 4 x i8> %v) {
+; CHECK-LABEL: vreduce_umax_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8>)
+
+define signext i8 @vreduce_smax_nxv4i8(<vscale x 4 x i8> %v) {
+; CHECK-LABEL: vreduce_smax_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, zero, -128
+; CHECK-NEXT:    vsetvli a1, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8>)
+
+define signext i8 @vreduce_umin_nxv4i8(<vscale x 4 x i8> %v) {
+; CHECK-LABEL: vreduce_umin_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>)
+
+define signext i8 @vreduce_smin_nxv4i8(<vscale x 4 x i8> %v) {
+; CHECK-LABEL: vreduce_smin_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, zero, 127
+; CHECK-NEXT:    vsetvli a1, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8>)
+
+define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) {
+; CHECK-LABEL: vreduce_and_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8>)
+
+define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) {
+; CHECK-LABEL: vreduce_or_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8>)
+
+define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) {
+; CHECK-LABEL: vreduce_xor_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
+  ret i8 %red
+}
+
+declare i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16>)
+
+define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
+; CHECK-LABEL: vreduce_add_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16>)
+
+define signext i16 @vreduce_umax_nxv1i16(<vscale x 1 x i16> %v) {
+; CHECK-LABEL: vreduce_umax_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16>)
+
+define signext i16 @vreduce_smax_nxv1i16(<vscale x 1 x i16> %v) {
+; CHECK-LABEL: vreduce_smax_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 1048568
+; CHECK-NEXT:    vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16>)
+
+define signext i16 @vreduce_umin_nxv1i16(<vscale x 1 x i16> %v) {
+; CHECK-LABEL: vreduce_umin_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16>)
+
+define signext i16 @vreduce_smin_nxv1i16(<vscale x 1 x i16> %v) {
+; CHECK-LABEL: vreduce_smin_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16>)
+
+define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) {
+; CHECK-LABEL: vreduce_and_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16>)
+
+define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) {
+; CHECK-LABEL: vreduce_or_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16>)
+
+define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) {
+; CHECK-LABEL: vreduce_xor_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16>)
+
+define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
+; CHECK-LABEL: vreduce_add_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16>)
+
+define signext i16 @vreduce_umax_nxv2i16(<vscale x 2 x i16> %v) {
+; CHECK-LABEL: vreduce_umax_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16>)
+
+define signext i16 @vreduce_smax_nxv2i16(<vscale x 2 x i16> %v) {
+; CHECK-LABEL: vreduce_smax_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 1048568
+; CHECK-NEXT:    vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16>)
+
+define signext i16 @vreduce_umin_nxv2i16(<vscale x 2 x i16> %v) {
+; CHECK-LABEL: vreduce_umin_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16>)
+
+define signext i16 @vreduce_smin_nxv2i16(<vscale x 2 x i16> %v) {
+; CHECK-LABEL: vreduce_smin_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16>)
+
+define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) {
+; CHECK-LABEL: vreduce_and_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16>)
+
+define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) {
+; CHECK-LABEL: vreduce_or_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>)
+
+define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) {
+; CHECK-LABEL: vreduce_xor_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>)
+
+define signext i16 @vreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
+; CHECK-LABEL: vreduce_add_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16>)
+
+define signext i16 @vreduce_umax_nxv4i16(<vscale x 4 x i16> %v) {
+; CHECK-LABEL: vreduce_umax_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16>)
+
+define signext i16 @vreduce_smax_nxv4i16(<vscale x 4 x i16> %v) {
+; CHECK-LABEL: vreduce_smax_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 1048568
+; CHECK-NEXT:    vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16>)
+
+define signext i16 @vreduce_umin_nxv4i16(<vscale x 4 x i16> %v) {
+; CHECK-LABEL: vreduce_umin_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16>)
+
+define signext i16 @vreduce_smin_nxv4i16(<vscale x 4 x i16> %v) {
+; CHECK-LABEL: vreduce_smin_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16>)
+
+define signext i16 @vreduce_and_nxv4i16(<vscale x 4 x i16> %v) {
+; CHECK-LABEL: vreduce_and_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16>)
+
+define signext i16 @vreduce_or_nxv4i16(<vscale x 4 x i16> %v) {
+; CHECK-LABEL: vreduce_or_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16>)
+
+define signext i16 @vreduce_xor_nxv4i16(<vscale x 4 x i16> %v) {
+; CHECK-LABEL: vreduce_xor_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
+  ret i16 %red
+}
+
+declare i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32>)
+
+define i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
+; CHECK-LABEL: vreduce_add_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32>)
+
+define i32 @vreduce_umax_nxv1i32(<vscale x 1 x i32> %v) {
+; CHECK-LABEL: vreduce_umax_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32>)
+
+define i32 @vreduce_smax_nxv1i32(<vscale x 1 x i32> %v) {
+; CHECK-LABEL: vreduce_smax_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32>)
+
+define i32 @vreduce_umin_nxv1i32(<vscale x 1 x i32> %v) {
+; CHECK-LABEL: vreduce_umin_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32>)
+
+define i32 @vreduce_smin_nxv1i32(<vscale x 1 x i32> %v) {
+; CHECK-LABEL: vreduce_smin_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32>)
+
+define i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) {
+; CHECK-LABEL: vreduce_and_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32>)
+
+define i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) {
+; CHECK-LABEL: vreduce_or_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32>)
+
+define i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) {
+; CHECK-LABEL: vreduce_xor_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32>)
+
+define i32 @vreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
+; CHECK-LABEL: vreduce_add_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32>)
+
+define i32 @vreduce_umax_nxv2i32(<vscale x 2 x i32> %v) {
+; CHECK-LABEL: vreduce_umax_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32>)
+
+define i32 @vreduce_smax_nxv2i32(<vscale x 2 x i32> %v) {
+; CHECK-LABEL: vreduce_smax_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>)
+
+define i32 @vreduce_umin_nxv2i32(<vscale x 2 x i32> %v) {
+; CHECK-LABEL: vreduce_umin_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32>)
+
+define i32 @vreduce_smin_nxv2i32(<vscale x 2 x i32> %v) {
+; CHECK-LABEL: vreduce_smin_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32>)
+
+define i32 @vreduce_and_nxv2i32(<vscale x 2 x i32> %v) {
+; CHECK-LABEL: vreduce_and_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>)
+
+define i32 @vreduce_or_nxv2i32(<vscale x 2 x i32> %v) {
+; CHECK-LABEL: vreduce_or_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32>)
+
+define i32 @vreduce_xor_nxv2i32(<vscale x 2 x i32> %v) {
+; CHECK-LABEL: vreduce_xor_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
+
+define i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
+; CHECK-LABEL: vreduce_add_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
+
+define i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) {
+; CHECK-LABEL: vreduce_umax_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
+
+define i32 @vreduce_smax_nxv4i32(<vscale x 4 x i32> %v) {
+; CHECK-LABEL: vreduce_smax_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
+
+define i32 @vreduce_umin_nxv4i32(<vscale x 4 x i32> %v) {
+; CHECK-LABEL: vreduce_umin_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
+
+define i32 @vreduce_smin_nxv4i32(<vscale x 4 x i32> %v) {
+; CHECK-LABEL: vreduce_smin_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
+
+define i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) {
+; CHECK-LABEL: vreduce_and_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
+
+define i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) {
+; CHECK-LABEL: vreduce_or_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
+
+define i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) {
+; CHECK-LABEL: vreduce_xor_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
+  ret i32 %red
+}
+
+declare i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64>)
+
+define i64 @vreduce_add_nxv1i64(<vscale x 1 x i64> %v) {
+; CHECK-LABEL: vreduce_add_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64>)
+
+define i64 @vreduce_umax_nxv1i64(<vscale x 1 x i64> %v) {
+; CHECK-LABEL: vreduce_umax_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64>)
+
+define i64 @vreduce_smax_nxv1i64(<vscale x 1 x i64> %v) {
+; CHECK-LABEL: vreduce_smax_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vsetvli a1, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsll.vx v25, v25, a1
+; CHECK-NEXT:    vmv.v.i v26, 0
+; CHECK-NEXT:    vsll.vx v26, v26, a1
+; CHECK-NEXT:    vsrl.vx v26, v26, a1
+; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64>)
+
+define i64 @vreduce_umin_nxv1i64(<vscale x 1 x i64> %v) {
+; CHECK-LABEL: vreduce_umin_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64>)
+
+define i64 @vreduce_smin_nxv1i64(<vscale x 1 x i64> %v) {
+; CHECK-LABEL: vreduce_smin_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsll.vx v25, v25, a1
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vmv.v.x v26, a0
+; CHECK-NEXT:    vsll.vx v26, v26, a1
+; CHECK-NEXT:    vor.vv v25, v25, v26
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64>)
+
+define i64 @vreduce_and_nxv1i64(<vscale x 1 x i64> %v) {
+; CHECK-LABEL: vreduce_and_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64>)
+
+define i64 @vreduce_or_nxv1i64(<vscale x 1 x i64> %v) {
+; CHECK-LABEL: vreduce_or_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64>)
+
+define i64 @vreduce_xor_nxv1i64(<vscale x 1 x i64> %v) {
+; CHECK-LABEL: vreduce_xor_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>)
+
+define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) {
+; CHECK-LABEL: vreduce_add_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsetvli a2, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64>)
+
+define i64 @vreduce_umax_nxv2i64(<vscale x 2 x i64> %v) {
+; CHECK-LABEL: vreduce_umax_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsetvli a2, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64>)
+
+define i64 @vreduce_smax_nxv2i64(<vscale x 2 x i64> %v) {
+; CHECK-LABEL: vreduce_smax_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vsetvli a1, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsll.vx v25, v25, a1
+; CHECK-NEXT:    vmv.v.i v26, 0
+; CHECK-NEXT:    vsll.vx v26, v26, a1
+; CHECK-NEXT:    vsrl.vx v26, v26, a1
+; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    vsetvli a2, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64>)
+
+define i64 @vreduce_umin_nxv2i64(<vscale x 2 x i64> %v) {
+; CHECK-LABEL: vreduce_umin_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsetvli a2, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64>)
+
+define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) {
+; CHECK-LABEL: vreduce_smin_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsll.vx v25, v25, a1
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vmv.v.x v26, a0
+; CHECK-NEXT:    vsll.vx v26, v26, a1
+; CHECK-NEXT:    vor.vv v25, v25, v26
+; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    vsetvli a2, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64>)
+
+define i64 @vreduce_and_nxv2i64(<vscale x 2 x i64> %v) {
+; CHECK-LABEL: vreduce_and_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsetvli a2, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64>)
+
+define i64 @vreduce_or_nxv2i64(<vscale x 2 x i64> %v) {
+; CHECK-LABEL: vreduce_or_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsetvli a2, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64>)
+
+define i64 @vreduce_xor_nxv2i64(<vscale x 2 x i64> %v) {
+; CHECK-LABEL: vreduce_xor_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsetvli a2, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64>)
+
+define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) {
+; CHECK-LABEL: vreduce_add_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsetvli a2, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64>)
+
+define i64 @vreduce_umax_nxv4i64(<vscale x 4 x i64> %v) {
+; CHECK-LABEL: vreduce_umax_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsetvli a2, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64>)
+
+define i64 @vreduce_smax_nxv4i64(<vscale x 4 x i64> %v) {
+; CHECK-LABEL: vreduce_smax_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vsetvli a1, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsll.vx v25, v25, a1
+; CHECK-NEXT:    vmv.v.i v26, 0
+; CHECK-NEXT:    vsll.vx v26, v26, a1
+; CHECK-NEXT:    vsrl.vx v26, v26, a1
+; CHECK-NEXT:    vor.vv v25, v26, v25
+; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    vsetvli a2, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
+
+define i64 @vreduce_umin_nxv4i64(<vscale x 4 x i64> %v) {
+; CHECK-LABEL: vreduce_umin_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsetvli a2, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64>)
+
+define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) {
+; CHECK-LABEL: vreduce_smin_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsll.vx v25, v25, a1
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vmv.v.x v26, a0
+; CHECK-NEXT:    vsll.vx v26, v26, a1
+; CHECK-NEXT:    vor.vv v25, v25, v26
+; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    vsetvli a2, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64>)
+
+define i64 @vreduce_and_nxv4i64(<vscale x 4 x i64> %v) {
+; CHECK-LABEL: vreduce_and_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsetvli a2, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64>)
+
+define i64 @vreduce_or_nxv4i64(<vscale x 4 x i64> %v) {
+; CHECK-LABEL: vreduce_or_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsetvli a2, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64>)
+
+define i64 @vreduce_xor_nxv4i64(<vscale x 4 x i64> %v) {
+; CHECK-LABEL: vreduce_xor_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    addi a1, zero, 32
+; CHECK-NEXT:    vsetvli a2, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vsrl.vx v25, v25, a1
+; CHECK-NEXT:    vmv.x.s a1, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
+  ret i64 %red
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll
new file mode 100644
index 000000000000..a541624c11cc
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll
@@ -0,0 +1,1529 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+
+declare i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8>)
+
+define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
+; CHECK-LABEL: vreduce_add_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8>)
+
+define signext i8 @vreduce_umax_nxv1i8(<vscale x 1 x i8> %v) {
+; CHECK-LABEL: vreduce_umax_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8>)
+
+define signext i8 @vreduce_smax_nxv1i8(<vscale x 1 x i8> %v) {
+; CHECK-LABEL: vreduce_smax_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, zero, -128
+; CHECK-NEXT:    vsetvli a1, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8>)
+
+define signext i8 @vreduce_umin_nxv1i8(<vscale x 1 x i8> %v) {
+; CHECK-LABEL: vreduce_umin_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8>)
+
+define signext i8 @vreduce_smin_nxv1i8(<vscale x 1 x i8> %v) {
+; CHECK-LABEL: vreduce_smin_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, zero, 127
+; CHECK-NEXT:    vsetvli a1, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8>)
+
+define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) {
+; CHECK-LABEL: vreduce_and_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8>)
+
+define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) {
+; CHECK-LABEL: vreduce_or_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8>)
+
+define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) {
+; CHECK-LABEL: vreduce_xor_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8>)
+
+define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
+; CHECK-LABEL: vreduce_add_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8>)
+
+define signext i8 @vreduce_umax_nxv2i8(<vscale x 2 x i8> %v) {
+; CHECK-LABEL: vreduce_umax_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8>)
+
+define signext i8 @vreduce_smax_nxv2i8(<vscale x 2 x i8> %v) {
+; CHECK-LABEL: vreduce_smax_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, zero, -128
+; CHECK-NEXT:    vsetvli a1, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8>)
+
+define signext i8 @vreduce_umin_nxv2i8(<vscale x 2 x i8> %v) {
+; CHECK-LABEL: vreduce_umin_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8>)
+
+define signext i8 @vreduce_smin_nxv2i8(<vscale x 2 x i8> %v) {
+; CHECK-LABEL: vreduce_smin_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, zero, 127
+; CHECK-NEXT:    vsetvli a1, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8>)
+
+define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) {
+; CHECK-LABEL: vreduce_and_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8>)
+
+define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) {
+; CHECK-LABEL: vreduce_or_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8>)
+
+define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) {
+; CHECK-LABEL: vreduce_xor_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8>)
+
+define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
+; CHECK-LABEL: vreduce_add_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8>)
+
+define signext i8 @vreduce_umax_nxv4i8(<vscale x 4 x i8> %v) {
+; CHECK-LABEL: vreduce_umax_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8>)
+
+define signext i8 @vreduce_smax_nxv4i8(<vscale x 4 x i8> %v) {
+; CHECK-LABEL: vreduce_smax_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, zero, -128
+; CHECK-NEXT:    vsetvli a1, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8>)
+
+define signext i8 @vreduce_umin_nxv4i8(<vscale x 4 x i8> %v) {
+; CHECK-LABEL: vreduce_umin_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>)
+
+define signext i8 @vreduce_smin_nxv4i8(<vscale x 4 x i8> %v) {
+; CHECK-LABEL: vreduce_smin_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, zero, 127
+; CHECK-NEXT:    vsetvli a1, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8>)
+
+define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) {
+; CHECK-LABEL: vreduce_and_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8>)
+
+define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) {
+; CHECK-LABEL: vreduce_or_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
+  ret i8 %red
+}
+
+declare i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8>)
+
+define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) {
+; CHECK-LABEL: vreduce_xor_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e8,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
+  ret i8 %red
+}
+
+declare i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16>)
+
+define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
+; CHECK-LABEL: vreduce_add_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16>)
+
+define signext i16 @vreduce_umax_nxv1i16(<vscale x 1 x i16> %v) {
+; CHECK-LABEL: vreduce_umax_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16>)
+
+define signext i16 @vreduce_smax_nxv1i16(<vscale x 1 x i16> %v) {
+; CHECK-LABEL: vreduce_smax_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 1048568
+; CHECK-NEXT:    vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16>)
+
+define signext i16 @vreduce_umin_nxv1i16(<vscale x 1 x i16> %v) {
+; CHECK-LABEL: vreduce_umin_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16>)
+
+define signext i16 @vreduce_smin_nxv1i16(<vscale x 1 x i16> %v) {
+; CHECK-LABEL: vreduce_smin_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    addiw a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16>)
+
+define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) {
+; CHECK-LABEL: vreduce_and_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16>)
+
+define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) {
+; CHECK-LABEL: vreduce_or_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16>)
+
+define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) {
+; CHECK-LABEL: vreduce_xor_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16>)
+
+define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
+; CHECK-LABEL: vreduce_add_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16>)
+
+define signext i16 @vreduce_umax_nxv2i16(<vscale x 2 x i16> %v) {
+; CHECK-LABEL: vreduce_umax_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16>)
+
+define signext i16 @vreduce_smax_nxv2i16(<vscale x 2 x i16> %v) {
+; CHECK-LABEL: vreduce_smax_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 1048568
+; CHECK-NEXT:    vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16>)
+
+define signext i16 @vreduce_umin_nxv2i16(<vscale x 2 x i16> %v) {
+; CHECK-LABEL: vreduce_umin_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16>)
+
+define signext i16 @vreduce_smin_nxv2i16(<vscale x 2 x i16> %v) {
+; CHECK-LABEL: vreduce_smin_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    addiw a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16>)
+
+define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) {
+; CHECK-LABEL: vreduce_and_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16>)
+
+define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) {
+; CHECK-LABEL: vreduce_or_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>)
+
+define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) {
+; CHECK-LABEL: vreduce_xor_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>)
+
+define signext i16 @vreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
+; CHECK-LABEL: vreduce_add_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16>)
+
+define signext i16 @vreduce_umax_nxv4i16(<vscale x 4 x i16> %v) {
+; CHECK-LABEL: vreduce_umax_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16>)
+
+define signext i16 @vreduce_smax_nxv4i16(<vscale x 4 x i16> %v) {
+; CHECK-LABEL: vreduce_smax_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 1048568
+; CHECK-NEXT:    vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16>)
+
+define signext i16 @vreduce_umin_nxv4i16(<vscale x 4 x i16> %v) {
+; CHECK-LABEL: vreduce_umin_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16>)
+
+define signext i16 @vreduce_smin_nxv4i16(<vscale x 4 x i16> %v) {
+; CHECK-LABEL: vreduce_smin_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    addiw a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16>)
+
+define signext i16 @vreduce_and_nxv4i16(<vscale x 4 x i16> %v) {
+; CHECK-LABEL: vreduce_and_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16>)
+
+define signext i16 @vreduce_or_nxv4i16(<vscale x 4 x i16> %v) {
+; CHECK-LABEL: vreduce_or_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
+  ret i16 %red
+}
+
+declare i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16>)
+
+define signext i16 @vreduce_xor_nxv4i16(<vscale x 4 x i16> %v) {
+; CHECK-LABEL: vreduce_xor_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
+  ret i16 %red
+}
+
+declare i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32>)
+
+define signext i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
+; CHECK-LABEL: vreduce_add_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32>)
+
+define signext i32 @vreduce_umax_nxv1i32(<vscale x 1 x i32> %v) {
+; CHECK-LABEL: vreduce_umax_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32>)
+
+define signext i32 @vreduce_smax_nxv1i32(<vscale x 1 x i32> %v) {
+; CHECK-LABEL: vreduce_smax_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32>)
+
+define signext i32 @vreduce_umin_nxv1i32(<vscale x 1 x i32> %v) {
+; CHECK-LABEL: vreduce_umin_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32>)
+
+define signext i32 @vreduce_smin_nxv1i32(<vscale x 1 x i32> %v) {
+; CHECK-LABEL: vreduce_smin_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    addiw a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32>)
+
+define signext i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) {
+; CHECK-LABEL: vreduce_and_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32>)
+
+define signext i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) {
+; CHECK-LABEL: vreduce_or_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32>)
+
+define signext i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) {
+; CHECK-LABEL: vreduce_xor_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32>)
+
+define signext i32 @vreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
+; CHECK-LABEL: vreduce_add_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32>)
+
+define signext i32 @vreduce_umax_nxv2i32(<vscale x 2 x i32> %v) {
+; CHECK-LABEL: vreduce_umax_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32>)
+
+define signext i32 @vreduce_smax_nxv2i32(<vscale x 2 x i32> %v) {
+; CHECK-LABEL: vreduce_smax_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>)
+
+define signext i32 @vreduce_umin_nxv2i32(<vscale x 2 x i32> %v) {
+; CHECK-LABEL: vreduce_umin_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32>)
+
+define signext i32 @vreduce_smin_nxv2i32(<vscale x 2 x i32> %v) {
+; CHECK-LABEL: vreduce_smin_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    addiw a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32>)
+
+define signext i32 @vreduce_and_nxv2i32(<vscale x 2 x i32> %v) {
+; CHECK-LABEL: vreduce_and_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>)
+
+define signext i32 @vreduce_or_nxv2i32(<vscale x 2 x i32> %v) {
+; CHECK-LABEL: vreduce_or_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32>)
+
+define signext i32 @vreduce_xor_nxv2i32(<vscale x 2 x i32> %v) {
+; CHECK-LABEL: vreduce_xor_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
+
+define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
+; CHECK-LABEL: vreduce_add_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
+
+define signext i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) {
+; CHECK-LABEL: vreduce_umax_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
+
+define signext i32 @vreduce_smax_nxv4i32(<vscale x 4 x i32> %v) {
+; CHECK-LABEL: vreduce_smax_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
+
+define signext i32 @vreduce_umin_nxv4i32(<vscale x 4 x i32> %v) {
+; CHECK-LABEL: vreduce_umin_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
+
+define signext i32 @vreduce_smin_nxv4i32(<vscale x 4 x i32> %v) {
+; CHECK-LABEL: vreduce_smin_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    addiw a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
+
+define signext i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) {
+; CHECK-LABEL: vreduce_and_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
+
+define signext i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) {
+; CHECK-LABEL: vreduce_or_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
+  ret i32 %red
+}
+
+declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
+
+define signext i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) {
+; CHECK-LABEL: vreduce_xor_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e32,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
+  ret i32 %red
+}
+
+declare i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64>)
+
+define i64 @vreduce_add_nxv1i64(<vscale x 1 x i64> %v) {
+; CHECK-LABEL: vreduce_add_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64>)
+
+define i64 @vreduce_umax_nxv1i64(<vscale x 1 x i64> %v) {
+; CHECK-LABEL: vreduce_umax_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64>)
+
+define i64 @vreduce_smax_nxv1i64(<vscale x 1 x i64> %v) {
+; CHECK-LABEL: vreduce_smax_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, zero, -1
+; CHECK-NEXT:    slli a0, a0, 63
+; CHECK-NEXT:    vsetvli a1, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64>)
+
+define i64 @vreduce_umin_nxv1i64(<vscale x 1 x i64> %v) {
+; CHECK-LABEL: vreduce_umin_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64>)
+
+define i64 @vreduce_smin_nxv1i64(<vscale x 1 x i64> %v) {
+; CHECK-LABEL: vreduce_smin_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, zero, -1
+; CHECK-NEXT:    slli a0, a0, 63
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64>)
+
+define i64 @vreduce_and_nxv1i64(<vscale x 1 x i64> %v) {
+; CHECK-LABEL: vreduce_and_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64>)
+
+define i64 @vreduce_or_nxv1i64(<vscale x 1 x i64> %v) {
+; CHECK-LABEL: vreduce_or_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64>)
+
+define i64 @vreduce_xor_nxv1i64(<vscale x 1 x i64> %v) {
+; CHECK-LABEL: vreduce_xor_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>)
+
+define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) {
+; CHECK-LABEL: vreduce_add_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64>)
+
+define i64 @vreduce_umax_nxv2i64(<vscale x 2 x i64> %v) {
+; CHECK-LABEL: vreduce_umax_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64>)
+
+define i64 @vreduce_smax_nxv2i64(<vscale x 2 x i64> %v) {
+; CHECK-LABEL: vreduce_smax_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, zero, -1
+; CHECK-NEXT:    slli a0, a0, 63
+; CHECK-NEXT:    vsetvli a1, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64>)
+
+define i64 @vreduce_umin_nxv2i64(<vscale x 2 x i64> %v) {
+; CHECK-LABEL: vreduce_umin_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64>)
+
+define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) {
+; CHECK-LABEL: vreduce_smin_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, zero, -1
+; CHECK-NEXT:    slli a0, a0, 63
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64>)
+
+define i64 @vreduce_and_nxv2i64(<vscale x 2 x i64> %v) {
+; CHECK-LABEL: vreduce_and_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64>)
+
+define i64 @vreduce_or_nxv2i64(<vscale x 2 x i64> %v) {
+; CHECK-LABEL: vreduce_or_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64>)
+
+define i64 @vreduce_xor_nxv2i64(<vscale x 2 x i64> %v) {
+; CHECK-LABEL: vreduce_xor_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64>)
+
+define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) {
+; CHECK-LABEL: vreduce_add_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT:    vredsum.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64>)
+
+define i64 @vreduce_umax_nxv4i64(<vscale x 4 x i64> %v) {
+; CHECK-LABEL: vreduce_umax_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT:    vredmaxu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64>)
+
+define i64 @vreduce_smax_nxv4i64(<vscale x 4 x i64> %v) {
+; CHECK-LABEL: vreduce_smax_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, zero, -1
+; CHECK-NEXT:    slli a0, a0, 63
+; CHECK-NEXT:    vsetvli a1, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT:    vredmax.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
+
+define i64 @vreduce_umin_nxv4i64(<vscale x 4 x i64> %v) {
+; CHECK-LABEL: vreduce_umin_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT:    vredminu.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64>)
+
+define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) {
+; CHECK-LABEL: vreduce_smin_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, zero, -1
+; CHECK-NEXT:    slli a0, a0, 63
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a1, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.x v25, a0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT:    vredmin.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64>)
+
+define i64 @vreduce_and_nxv4i64(<vscale x 4 x i64> %v) {
+; CHECK-LABEL: vreduce_and_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, -1
+; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT:    vredand.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64>)
+
+define i64 @vreduce_or_nxv4i64(<vscale x 4 x i64> %v) {
+; CHECK-LABEL: vreduce_or_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT:    vredor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
+  ret i64 %red
+}
+
+declare i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64>)
+
+define i64 @vreduce_xor_nxv4i64(<vscale x 4 x i64> %v) {
+; CHECK-LABEL: vreduce_xor_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.v.i v25, 0
+; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT:    vredxor.vs v25, v8, v25
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    ret
+  %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
+  ret i64 %red
+}


        


More information about the llvm-commits mailing list