[llvm] ae7c664 - [RISCV] Add basic code modeling for fixed length vector reduction.

Sun Mar 13 20:04:37 PDT 2022

Author: Yeting Kuo
Date: 2022-03-14T11:04:31+08:00
New Revision: ae7c6647f3ea81fc4447a76d824fea4b93862a4e

URL: https://github.com/llvm/llvm-project/commit/ae7c6647f3ea81fc4447a76d824fea4b93862a4e
DIFF: https://github.com/llvm/llvm-project/commit/ae7c6647f3ea81fc4447a76d824fea4b93862a4e.diff

LOG: [RISCV] Add basic code modeling for fixed length vector reduction.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D121447

Added: 
    llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
    llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
    llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
    llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
    llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll

Modified: 
    llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
    llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
    llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
    llvm/test/Analysis/CostModel/RISCV/reduce-or.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index e164264851a3e..941f97c62fe9e 100644

--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -217,6 +217,64 @@ InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
   return NumLoads * MemOpCost;
 }
 
+InstructionCost
+RISCVTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+                                     bool IsUnsigned,
+                                     TTI::TargetCostKind CostKind) {
+  // FIXME: Only supporting fixed vectors for now.
+  if (!isa<FixedVectorType>(Ty))
+    return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
+
+  if (!ST->useRVVForFixedLengthVectors())
+    return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
+
+  // Skip if scalar size of Ty is bigger than ELEN.
+  if (Ty->getScalarSizeInBits() > ST->getMaxELENForFixedLengthVectors())
+    return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
+
+  // IR Reduction is composed by two vmv and one rvv reduction instruction.
+  InstructionCost BaseCost = 2;
+  unsigned VL = cast<FixedVectorType>(Ty)->getNumElements();
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
+}
+
+InstructionCost
+RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *VTy,
+                                         Optional<FastMathFlags> FMF,
+                                         TTI::TargetCostKind CostKind) {
+  // FIXME: Only supporting fixed vectors for now.
+  if (!isa<FixedVectorType>(VTy))
+    return BaseT::getArithmeticReductionCost(Opcode, VTy, FMF, CostKind);
+
+  // FIXME: Do not support i1 and/or reduction now.
+  if (VTy->getElementType()->isIntegerTy(1))
+    return BaseT::getArithmeticReductionCost(Opcode, VTy, FMF, CostKind);
+
+  if (!ST->useRVVForFixedLengthVectors())
+    return BaseT::getArithmeticReductionCost(Opcode, VTy, FMF, CostKind);
+
+  // Skip if scalar size of VTy is bigger than ELEN.
+  if (VTy->getScalarSizeInBits() > ST->getMaxELENForFixedLengthVectors())
+    return BaseT::getArithmeticReductionCost(Opcode, VTy, FMF, CostKind);
+
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  if (ISD != ISD::ADD && ISD != ISD::OR && ISD != ISD::XOR && ISD != ISD::AND &&
+      ISD != ISD::FADD)
+    return BaseT::getArithmeticReductionCost(Opcode, VTy, FMF, CostKind);
+
+  // IR Reduction is composed by two vmv and one rvv reduction instruction.
+  InstructionCost BaseCost = 2;
+  unsigned VL = cast<FixedVectorType>(VTy)->getNumElements();
+  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VTy);
+
+  if (TTI::requiresOrderedReduction(FMF))
+    return (LT.first - 1) + BaseCost + VL;
+  return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
+}
+
 void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                            TTI::UnrollingPreferences &UP,
                                            OptimizationRemarkEmitter *ORE) {

diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index f973b0537baf7..7f63d6ba29c47 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -88,6 +88,14 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
                                          TTI::TargetCostKind CostKind,
                                          const Instruction *I);
 
+  InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+                                         bool IsUnsigned,
+                                         TTI::TargetCostKind CostKind);
+
+  InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
+                                             Optional<FastMathFlags> FMF,
+                                             TTI::TargetCostKind CostKind);
+
   bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
     if (!ST->hasVInstructions())
       return false;

diff  --git a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
new file mode 100644
index 0000000000000..dd4001166c138
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
@@ -0,0 +1,172 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV32
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV64
+
+define i32 @reduce_i8(i32 %arg) {
+; RISCV32-LABEL: 'reduce_i8'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_i8'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
+  %V2   = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
+  %V4   = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
+  %V8   = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
+  %V16  = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+  %V32  = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
+  %V64  = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
+  %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_i16(i32 %arg) {
+; RISCV32-LABEL: 'reduce_i16'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_i16'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
+  %V2   = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
+  %V4   = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
+  %V8   = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
+  %V16  = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
+  %V32  = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
+  %V64  = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
+  %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_i32(i32 %arg) {
+; RISCV32-LABEL: 'reduce_i32'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_i32'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
+  %V2   = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
+  %V4   = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
+  %V8   = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
+  %V16  = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
+  %V32  = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
+  %V64  = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
+  %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_i64(i32 %arg) {
+; RISCV32-LABEL: 'reduce_i64'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_i64'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
+  %V2   = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+  %V4   = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+  %V8   = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+  %V16  = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+  %V32  = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
+  %V64  = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
+  %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
+  ret i32 undef
+}
+
+declare i8 @llvm.vector.reduce.add.v1i8(<1 x i8>)
+declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>)
+declare i16 @llvm.vector.reduce.add.v1i16(<1 x i16>)
+declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.add.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.add.v128i16(<128 x i16>)
+declare i32 @llvm.vector.reduce.add.v1i32(<1 x i32>)
+declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.add.v64i32(<64 x i32>)
+declare i32 @llvm.vector.reduce.add.v128i32(<128 x i32>)
+declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.add.v32i64(<32 x i64>)
+declare i64 @llvm.vector.reduce.add.v64i64(<64 x i64>)
+declare i64 @llvm.vector.reduce.add.v128i64(<128 x i64>)

diff  --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
index 27c66ced9f86e..0dac7459e9907 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV32
-; RUN: opt < %s -mtriple=riscv64 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV64
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV32
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV64
 
 define i32 @reduce_i1(i32 %arg) {
 ; RISCV32-LABEL: 'reduce_i1'
@@ -36,6 +36,142 @@ define i32 @reduce_i1(i32 %arg) {
   ret i32 undef
 }
 
+define i32 @reduce_i8(i32 %arg) {
+; RISCV32-LABEL: 'reduce_i8'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_i8'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
+  %V2   = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef)
+  %V4   = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
+  %V8   = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
+  %V16  = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
+  %V32  = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
+  %V64  = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
+  %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_i16(i32 %arg) {
+; RISCV32-LABEL: 'reduce_i16'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_i16'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> undef)
+  %V2   = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef)
+  %V4   = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
+  %V8   = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
+  %V16  = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
+  %V32  = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
+  %V64  = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
+  %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_i32(i32 %arg) {
+; RISCV32-LABEL: 'reduce_i32'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_i32'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> undef)
+  %V2   = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
+  %V4   = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
+  %V8   = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
+  %V16  = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> undef)
+  %V32  = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
+  %V64  = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
+  %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_i64(i32 %arg) {
+; RISCV32-LABEL: 'reduce_i64'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_i64'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
+  %V2   = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+  %V4   = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+  %V8   = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
+  %V16  = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
+  %V32  = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
+  %V64  = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
+  %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
+  ret i32 undef
+}
+
 declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1>)
 declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>)
 declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>)
@@ -44,3 +180,35 @@ declare i1 @llvm.vector.reduce.and.v16i1(<16 x i1>)
 declare i1 @llvm.vector.reduce.and.v32i1(<32 x i1>)
 declare i1 @llvm.vector.reduce.and.v64i1(<64 x i1>)
 declare i1 @llvm.vector.reduce.and.v128i1(<128 x i1>)
+declare i8 @llvm.vector.reduce.and.v1i8(<1 x i8>)
+declare i8 @llvm.vector.reduce.and.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.and.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.and.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.and.v128i8(<128 x i8>)
+declare i16 @llvm.vector.reduce.and.v1i16(<1 x i16>)
+declare i16 @llvm.vector.reduce.and.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.and.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.and.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.and.v128i16(<128 x i16>)
+declare i32 @llvm.vector.reduce.and.v1i32(<1 x i32>)
+declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.and.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.and.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.and.v64i32(<64 x i32>)
+declare i32 @llvm.vector.reduce.and.v128i32(<128 x i32>)
+declare i64 @llvm.vector.reduce.and.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.and.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.and.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.and.v32i64(<32 x i64>)
+declare i64 @llvm.vector.reduce.and.v64i64(<64 x i64>)
+declare i64 @llvm.vector.reduce.and.v128i64(<128 x i64>)

diff  --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
new file mode 100644
index 0000000000000..535b957828bbc
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
@@ -0,0 +1,159 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=FP-REDUCE
+
+define void @reduce_fadd_half() {
+; FP-REDUCE-LABEL: 'reduce_fadd_half'
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction:   %V2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:   %V4 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %V8 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction:   %V16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction:   %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:   %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction:   %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   ret void
+  %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
+  %V2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
+  %V4 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
+  %V8 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
+  %V16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
+  %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef)
+  %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef)
+  %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef)
+  ret void
+}
+
+define void @reduce_fadd_float() {
+; FP-REDUCE-LABEL: 'reduce_fadd_float'
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction:   %V2 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:   %V4 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %V8 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction:   %V16 = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction:   %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction:   %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:   %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   ret void
+  %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
+  %V2 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
+  %V4 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
+  %V8 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
+  %V16 = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.0, <16 x float> undef)
+  %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef)
+  %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef)
+  %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef)
+  ret void
+}
+
+define void @reduce_fadd_double() {
+; FP-REDUCE-LABEL: 'reduce_fadd_double'
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction:   %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction:   %V2 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:   %V4 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %V8 = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction:   %V16 = call fast double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction:   %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction:   %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction:   %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   ret void
+  %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
+  %V2 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
+  %V4 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
+  %V8 = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.0, <8 x double> undef)
+  %V16 = call fast double @llvm.vector.reduce.fadd.v16f64(double 0.0, <16 x double> undef)
+  %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef)
+  %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef)
+  %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.0, <128 x double> undef)
+  ret void
+}
+
+define void @reduce_oredered_fadd_half() {
+; FP-REDUCE-LABEL: 'reduce_oredered_fadd_half'
+; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 3 for instruction:   %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
+; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 4 for instruction:   %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 6 for instruction:   %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 10 for instruction:   %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 18 for instruction:   %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 34 for instruction:   %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
+; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 66 for instruction:   %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
+; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 130 for instruction:   %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 1 for instruction:   ret void
+  %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
+  %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
+  %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
+  %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
+  %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
+  %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef)
+  %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef)
+  %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef)
+  ret void
+}
+
+define void @reduce_oredered_fadd_float() {
+; FP-REDUCE-LABEL: 'reduce_oredered_fadd_float'
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction:   %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:   %V2 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction:   %V4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:   %V8 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction:   %V16 = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction:   %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction:   %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 131 for instruction:   %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   ret void
+  %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
+  %V2 = call float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
+  %V4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
+  %V8 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
+  %V16 = call float @llvm.vector.reduce.fadd.v16f32(float 0.0, <16 x float> undef)
+  %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef)
+  %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef)
+  %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef)
+  ret void
+}
+
+define void @reduce_oredered_fadd_double() {
+; FP-REDUCE-LABEL: 'reduce_oredered_fadd_double'
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction:   %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction:   %V2 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction:   %V4 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction:   %V8 = call double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction:   %V16 = call double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction:   %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 67 for instruction:   %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 133 for instruction:   %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   ret void
+  %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
+  %V2 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
+  %V4 = call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
+  %V8 = call double @llvm.vector.reduce.fadd.v8f64(double 0.0, <8 x double> undef)
+  %V16 = call double @llvm.vector.reduce.fadd.v16f64(double 0.0, <16 x double> undef)
+  %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef)
+  %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef)
+  %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.0, <128 x double> undef)
+  ret void
+}
+
+declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>)
+declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>)
+declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
+declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>)
+declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>)
+declare half @llvm.vector.reduce.fadd.v32f16(half, <32 x half>)
+declare half @llvm.vector.reduce.fadd.v64f16(half, <64 x half>)
+declare half @llvm.vector.reduce.fadd.v128f16(half, <128 x half>)
+declare float @llvm.vector.reduce.fadd.v1f32(float, <1 x float>)
+declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
+declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
+declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
+declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>)
+declare float @llvm.vector.reduce.fadd.v32f32(float, <32 x float>)
+declare float @llvm.vector.reduce.fadd.v64f32(float, <64 x float>)
+declare float @llvm.vector.reduce.fadd.v128f32(float, <128 x float>)
+declare double @llvm.vector.reduce.fadd.v1f64(double, <1 x double>)
+declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
+declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
+declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>)
+declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>)
+declare double @llvm.vector.reduce.fadd.v32f64(double, <32 x double>)
+declare double @llvm.vector.reduce.fadd.v64f64(double, <64 x double>)
+declare double @llvm.vector.reduce.fadd.v128f64(double, <128 x double>)

diff  --git a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
new file mode 100644
index 0000000000000..f22452e0403a5
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
@@ -0,0 +1,340 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV32
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV64
+
+define i32 @reduce_umax_i8(i32 %arg) {
+; RISCV32-LABEL: 'reduce_umax_i8'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_umax_i8'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
+  %V2   = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef)
+  %V4   = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
+  %V8   = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
+  %V16  = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
+  %V32  = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
+  %V64  = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
+  %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_umax_i16(i32 %arg) {
+; RISCV32-LABEL: 'reduce_umax_i16'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_umax_i16'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> undef)
+  %V2   = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
+  %V4   = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
+  %V8   = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
+  %V16  = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
+  %V32  = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
+  %V64  = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
+  %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_umax_i32(i32 %arg) {
+; RISCV32-LABEL: 'reduce_umax_i32'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_umax_i32'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> undef)
+  %V2   = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
+  %V4   = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
+  %V8   = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
+  %V16  = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef)
+  %V32  = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
+  %V64  = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
+  %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_umax_i64(i32 %arg) {
+; RISCV32-LABEL: 'reduce_umax_i64'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_umax_i64'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
+  %V2   = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
+  %V4   = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
+  %V8   = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
+  %V16  = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
+  %V32  = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
+  %V64  = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
+  %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_smax_i8(i32 %arg) {
+; RISCV32-LABEL: 'reduce_smax_i8'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_smax_i8'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
+  %V2   = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)
+  %V4   = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
+  %V8   = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
+  %V16  = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
+  %V32  = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
+  %V64  = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
+  %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_smax_i16(i32 %arg) {
+; RISCV32-LABEL: 'reduce_smax_i16'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.smax.v1i16(<1 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_smax_i16'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.smax.v1i16(<1 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i16 @llvm.vector.reduce.smax.v1i16(<1 x i16> undef)
+  %V2   = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
+  %V4   = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
+  %V8   = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
+  %V16  = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
+  %V32  = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
+  %V64  = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
+  %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_smax_i32(i32 %arg) {
+; RISCV32-LABEL: 'reduce_smax_i32'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_smax_i32'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> undef)
+  %V2   = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
+  %V4   = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
+  %V8   = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
+  %V16  = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
+  %V32  = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
+  %V64  = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
+  %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_smax_i64(i32 %arg) {
+; RISCV32-LABEL: 'reduce_smax_i64'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_smax_i64'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
+  %V2   = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+  %V4   = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+  %V8   = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
+  %V16  = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
+  %V32  = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
+  %V64  = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
+  %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
+  ret i32 undef
+}
+
+declare i8 @llvm.vector.reduce.umax.v1i8(<1 x i8>)
+declare i8 @llvm.vector.reduce.umax.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.umax.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.umax.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.umax.v128i8(<128 x i8>)
+declare i16 @llvm.vector.reduce.umax.v1i16(<1 x i16>)
+declare i16 @llvm.vector.reduce.umax.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.umax.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.umax.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.umax.v128i16(<128 x i16>)
+declare i32 @llvm.vector.reduce.umax.v1i32(<1 x i32>)
+declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.umax.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.umax.v64i32(<64 x i32>)
+declare i32 @llvm.vector.reduce.umax.v128i32(<128 x i32>)
+declare i64 @llvm.vector.reduce.umax.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.umax.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.umax.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.umax.v32i64(<32 x i64>)
+declare i64 @llvm.vector.reduce.umax.v64i64(<64 x i64>)
+declare i64 @llvm.vector.reduce.umax.v128i64(<128 x i64>)
+declare i8 @llvm.vector.reduce.smax.v1i8(<1 x i8>)
+declare i8 @llvm.vector.reduce.smax.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.smax.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>)
+declare i16 @llvm.vector.reduce.smax.v1i16(<1 x i16>)
+declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.smax.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.smax.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.smax.v128i16(<128 x i16>)
+declare i32 @llvm.vector.reduce.smax.v1i32(<1 x i32>)
+declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.smax.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.smax.v64i32(<64 x i32>)
+declare i32 @llvm.vector.reduce.smax.v128i32(<128 x i32>)
+declare i64 @llvm.vector.reduce.smax.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.smax.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.smax.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.smax.v32i64(<32 x i64>)
+declare i64 @llvm.vector.reduce.smax.v64i64(<64 x i64>)
+declare i64 @llvm.vector.reduce.smax.v128i64(<128 x i64>)

diff  --git a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
new file mode 100644
index 0000000000000..daf10e5255e0d
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
@@ -0,0 +1,340 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV32
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV64
+
+define i32 @reduce_umin_i8(i32 %arg) {
+; RISCV32-LABEL: 'reduce_umin_i8'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_umin_i8'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
+  %V2   = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
+  %V4   = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
+  %V8   = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
+  %V16  = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
+  %V32  = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
+  %V64  = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
+  %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_umin_i16(i32 %arg) {
+; RISCV32-LABEL: 'reduce_umin_i16'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.umin.v1i16(<1 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_umin_i16'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.umin.v1i16(<1 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i16 @llvm.vector.reduce.umin.v1i16(<1 x i16> undef)
+  %V2   = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
+  %V4   = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
+  %V8   = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
+  %V16  = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
+  %V32  = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
+  %V64  = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
+  %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_umin_i32(i32 %arg) {
+; RISCV32-LABEL: 'reduce_umin_i32'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_umin_i32'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> undef)
+  %V2   = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
+  %V4   = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
+  %V8   = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
+  %V16  = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
+  %V32  = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
+  %V64  = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
+  %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_umin_i64(i32 %arg) {
+; RISCV32-LABEL: 'reduce_umin_i64'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_umin_i64'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
+  %V2   = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
+  %V4   = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
+  %V8   = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
+  %V16  = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
+  %V32  = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
+  %V64  = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
+  %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_smin_i8(i32 %arg) {
+; RISCV32-LABEL: 'reduce_smin_i8'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_smin_i8'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
+  %V2   = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef)
+  %V4   = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
+  %V8   = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
+  %V16  = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
+  %V32  = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
+  %V64  = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
+  %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_smin_i16(i32 %arg) {
+; RISCV32-LABEL: 'reduce_smin_i16'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.smin.v1i16(<1 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_smin_i16'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.smin.v1i16(<1 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i16 @llvm.vector.reduce.smin.v1i16(<1 x i16> undef)
+  %V2   = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
+  %V4   = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
+  %V8   = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
+  %V16  = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
+  %V32  = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
+  %V64  = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
+  %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_smin_i32(i32 %arg) {
+; RISCV32-LABEL: 'reduce_smin_i32'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_smin_i32'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> undef)
+  %V2   = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
+  %V4   = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
+  %V8   = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
+  %V16  = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
+  %V32  = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
+  %V64  = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
+  %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_smin_i64(i32 %arg) {
+; RISCV32-LABEL: 'reduce_smin_i64'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_smin_i64'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
+  %V2   = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
+  %V4   = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
+  %V8   = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
+  %V16  = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
+  %V32  = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
+  %V64  = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
+  %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
+  ret i32 undef
+}
+
+declare i8 @llvm.vector.reduce.umin.v1i8(<1 x i8>)
+declare i8 @llvm.vector.reduce.umin.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.umin.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>)
+declare i16 @llvm.vector.reduce.umin.v1i16(<1 x i16>)
+declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.umin.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.umin.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.umin.v128i16(<128 x i16>)
+declare i32 @llvm.vector.reduce.umin.v1i32(<1 x i32>)
+declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.umin.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.umin.v64i32(<64 x i32>)
+declare i32 @llvm.vector.reduce.umin.v128i32(<128 x i32>)
+declare i64 @llvm.vector.reduce.umin.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.umin.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.umin.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.umin.v32i64(<32 x i64>)
+declare i64 @llvm.vector.reduce.umin.v64i64(<64 x i64>)
+declare i64 @llvm.vector.reduce.umin.v128i64(<128 x i64>)
+declare i8 @llvm.vector.reduce.smin.v1i8(<1 x i8>)
+declare i8 @llvm.vector.reduce.smin.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.smin.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.smin.v128i8(<128 x i8>)
+declare i16 @llvm.vector.reduce.smin.v1i16(<1 x i16>)
+declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.smin.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.smin.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.smin.v128i16(<128 x i16>)
+declare i32 @llvm.vector.reduce.smin.v1i32(<1 x i32>)
+declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.smin.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.smin.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.smin.v64i32(<64 x i32>)
+declare i32 @llvm.vector.reduce.smin.v128i32(<128 x i32>)
+declare i64 @llvm.vector.reduce.smin.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.smin.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.smin.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.smin.v32i64(<32 x i64>)
+declare i64 @llvm.vector.reduce.smin.v64i64(<64 x i64>)
+declare i64 @llvm.vector.reduce.smin.v128i64(<128 x i64>)

diff  --git a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
index 16178c063fbcb..3ec4b366326f9 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV32
-; RUN: opt < %s -mtriple=riscv64 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV64
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV32
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV64
 
 define i32 @reduce_i1(i32 %arg) {
 ; RISCV32-LABEL: 'reduce_i1'
@@ -36,6 +36,142 @@ define i32 @reduce_i1(i32 %arg) {
   ret i32 undef
 }
 
+define i32 @reduce_i8(i32 %arg) {
+; RISCV32-LABEL: 'reduce_i8'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_i8'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
+  %V2   = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef)
+  %V4   = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
+  %V8   = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
+  %V16  = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
+  %V32  = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
+  %V64  = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
+  %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_i16(i32 %arg) {
+; RISCV32-LABEL: 'reduce_i16'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_i16'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> undef)
+  %V2   = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef)
+  %V4   = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
+  %V8   = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
+  %V16  = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
+  %V32  = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
+  %V64  = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
+  %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_i32(i32 %arg) {
+; RISCV32-LABEL: 'reduce_i32'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_i32'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> undef)
+  %V2   = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
+  %V4   = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
+  %V8   = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
+  %V16  = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> undef)
+  %V32  = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
+  %V64  = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
+  %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_i64(i32 %arg) {
+; RISCV32-LABEL: 'reduce_i64'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_i64'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
+  %V2   = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+  %V4   = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+  %V8   = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
+  %V16  = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
+  %V32  = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
+  %V64  = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
+  %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
+  ret i32 undef
+}
+
 declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1>)
 declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1>)
 declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1>)
@@ -44,3 +180,35 @@ declare i1 @llvm.vector.reduce.or.v16i1(<16 x i1>)
 declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1>)
 declare i1 @llvm.vector.reduce.or.v64i1(<64 x i1>)
 declare i1 @llvm.vector.reduce.or.v128i1(<128 x i1>)
+declare i8 @llvm.vector.reduce.or.v1i8(<1 x i8>)
+declare i8 @llvm.vector.reduce.or.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.or.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.or.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.or.v128i8(<128 x i8>)
+declare i16 @llvm.vector.reduce.or.v1i16(<1 x i16>)
+declare i16 @llvm.vector.reduce.or.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.or.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.or.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.or.v128i16(<128 x i16>)
+declare i32 @llvm.vector.reduce.or.v1i32(<1 x i32>)
+declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.or.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.or.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.or.v64i32(<64 x i32>)
+declare i32 @llvm.vector.reduce.or.v128i32(<128 x i32>)
+declare i64 @llvm.vector.reduce.or.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.or.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.or.v32i64(<32 x i64>)
+declare i64 @llvm.vector.reduce.or.v64i64(<64 x i64>)
+declare i64 @llvm.vector.reduce.or.v128i64(<128 x i64>)

diff  --git a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
new file mode 100644
index 0000000000000..b20bef29fd27e
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
@@ -0,0 +1,172 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV32
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RISCV64
+
+define i32 @reduce_i8(i32 %arg) {
+; RISCV32-LABEL: 'reduce_i8'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_i8'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
+  %V2   = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef)
+  %V4   = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
+  %V8   = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
+  %V16  = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
+  %V32  = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
+  %V64  = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
+  %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_i16(i32 %arg) {
+; RISCV32-LABEL: 'reduce_i16'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_i16'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> undef)
+  %V2   = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef)
+  %V4   = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
+  %V8   = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
+  %V16  = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
+  %V32  = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
+  %V64  = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
+  %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_i32(i32 %arg) {
+; RISCV32-LABEL: 'reduce_i32'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_i32'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> undef)
+  %V2   = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
+  %V4   = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
+  %V8   = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
+  %V16  = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef)
+  %V32  = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
+  %V64  = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
+  %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
+  ret i32 undef
+}
+
+define i32 @reduce_i64(i32 %arg) {
+; RISCV32-LABEL: 'reduce_i64'
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
+; RISCV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; RISCV64-LABEL: 'reduce_i64'
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
+; RISCV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %V1   = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
+  %V2   = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+  %V4   = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+  %V8   = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
+  %V16  = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
+  %V32  = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
+  %V64  = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
+  %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
+  ret i32 undef
+}
+
+declare i8 @llvm.vector.reduce.xor.v1i8(<1 x i8>)
+declare i8 @llvm.vector.reduce.xor.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.xor.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.xor.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.xor.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.xor.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.xor.v128i8(<128 x i8>)
+declare i16 @llvm.vector.reduce.xor.v1i16(<1 x i16>)
+declare i16 @llvm.vector.reduce.xor.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.xor.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.xor.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.xor.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.xor.v128i16(<128 x i16>)
+declare i32 @llvm.vector.reduce.xor.v1i32(<1 x i32>)
+declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.xor.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.xor.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.xor.v64i32(<64 x i32>)
+declare i32 @llvm.vector.reduce.xor.v128i32(<128 x i32>)
+declare i64 @llvm.vector.reduce.xor.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.xor.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.xor.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.xor.v32i64(<32 x i64>)
+declare i64 @llvm.vector.reduce.xor.v64i64(<64 x i64>)
+declare i64 @llvm.vector.reduce.xor.v128i64(<128 x i64>)