[llvm] e88da1d - [RISCV] Add support for integer fixed min/max
Fraser Cormack via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 12 01:25:53 PST 2021
Author: Fraser Cormack
Date: 2021-02-12T09:19:45Z
New Revision: e88da1d6770bf31b859013c338adf1b001ebed5b
URL: https://github.com/llvm/llvm-project/commit/e88da1d6770bf31b859013c338adf1b001ebed5b
DIFF: https://github.com/llvm/llvm-project/commit/e88da1d6770bf31b859013c338adf1b001ebed5b.diff
LOG: [RISCV] Add support for integer fixed min/max
This patch extends the initial fixed-length vector support to include
smin, smax, umin, and umax.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D96491
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 41d5bde7617b..24b3025fa792 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -547,6 +547,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
+
+ setOperationAction(ISD::SMIN, VT, Custom);
+ setOperationAction(ISD::SMAX, VT, Custom);
+ setOperationAction(ISD::UMIN, VT, Custom);
+ setOperationAction(ISD::UMAX, VT, Custom);
}
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
@@ -1201,6 +1206,14 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
case ISD::FMA:
return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
+ case ISD::SMIN:
+ return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
+ case ISD::SMAX:
+ return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
+ case ISD::UMIN:
+ return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
+ case ISD::UMAX:
+ return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
}
}
@@ -4697,6 +4710,10 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FDIV_VL)
NODE_NAME_CASE(FNEG_VL)
NODE_NAME_CASE(FMA_VL)
+ NODE_NAME_CASE(SMIN_VL)
+ NODE_NAME_CASE(SMAX_VL)
+ NODE_NAME_CASE(UMIN_VL)
+ NODE_NAME_CASE(UMAX_VL)
NODE_NAME_CASE(VMCLR_VL)
NODE_NAME_CASE(VMSET_VL)
NODE_NAME_CASE(VRGATHER_VX_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index ef86ce2cf608..3d7830158f9c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -162,6 +162,10 @@ enum NodeType : unsigned {
FDIV_VL,
FNEG_VL,
FMA_VL,
+ SMIN_VL,
+ SMAX_VL,
+ UMIN_VL,
+ UMAX_VL,
// Set mask vector to all zeros or ones.
VMCLR_VL,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index c6ae211d8ca8..f1d6952a9d8b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -64,6 +64,10 @@ def riscv_urem_vl : SDNode<"RISCVISD::UREM_VL", SDT_RISCVIntBinOp_VL>;
def riscv_shl_vl : SDNode<"RISCVISD::SHL_VL", SDT_RISCVIntBinOp_VL>;
def riscv_sra_vl : SDNode<"RISCVISD::SRA_VL", SDT_RISCVIntBinOp_VL>;
def riscv_srl_vl : SDNode<"RISCVISD::SRL_VL", SDT_RISCVIntBinOp_VL>;
+def riscv_smin_vl : SDNode<"RISCVISD::SMIN_VL", SDT_RISCVIntBinOp_VL>;
+def riscv_smax_vl : SDNode<"RISCVISD::SMAX_VL", SDT_RISCVIntBinOp_VL>;
+def riscv_umin_vl : SDNode<"RISCVISD::UMIN_VL", SDT_RISCVIntBinOp_VL>;
+def riscv_umax_vl : SDNode<"RISCVISD::UMAX_VL", SDT_RISCVIntBinOp_VL>;
def riscv_fadd_vl : SDNode<"RISCVISD::FADD_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>;
def riscv_fsub_vl : SDNode<"RISCVISD::FSUB_VL", SDT_RISCVFPBinOp_VL>;
def riscv_fmul_vl : SDNode<"RISCVISD::FMUL_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>;
@@ -221,6 +225,12 @@ defm "" : VPatBinaryVL_VV_VX_VI<riscv_shl_vl, "PseudoVSLL", uimm5>;
defm "" : VPatBinaryVL_VV_VX_VI<riscv_srl_vl, "PseudoVSRL", uimm5>;
defm "" : VPatBinaryVL_VV_VX_VI<riscv_sra_vl, "PseudoVSRA", uimm5>;
+// 12.9. Vector Integer Min/Max Instructions
+defm "" : VPatBinaryVL_VV_VX<riscv_umin_vl, "PseudoVMINU">;
+defm "" : VPatBinaryVL_VV_VX<riscv_smin_vl, "PseudoVMIN">;
+defm "" : VPatBinaryVL_VV_VX<riscv_umax_vl, "PseudoVMAXU">;
+defm "" : VPatBinaryVL_VV_VX<riscv_smax_vl, "PseudoVMAX">;
+
// 12.10. Vector Single-Width Integer Multiply Instructions
defm "" : VPatBinaryVL_VV_VX<riscv_mul_vl, "PseudoVMUL">;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index e5920d64af1d..6ebaa475c43b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -888,6 +888,294 @@ define void @urem_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
ret void
}
+define void @smin_v16i8(<16 x i8>* %x, <16 x i8>* %y) {
+; CHECK-LABEL: smin_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vle8.v v26, (a1)
+; CHECK-NEXT: vmin.vv v25, v25, v26
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = load <16 x i8>, <16 x i8>* %y
+ %cc = icmp slt <16 x i8> %a, %b
+ %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b
+ store <16 x i8> %c, <16 x i8>* %x
+ ret void
+}
+
+define void @smin_v8i16(<8 x i16>* %x, <8 x i16>* %y) {
+; CHECK-LABEL: smin_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vle16.v v26, (a1)
+; CHECK-NEXT: vmin.vv v25, v25, v26
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = load <8 x i16>, <8 x i16>* %y
+ %cc = icmp slt <8 x i16> %a, %b
+ %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b
+ store <8 x i16> %c, <8 x i16>* %x
+ ret void
+}
+
+define void @smin_v4i32(<4 x i32>* %x, <4 x i32>* %y) {
+; CHECK-LABEL: smin_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vle32.v v26, (a1)
+; CHECK-NEXT: vmin.vv v25, v25, v26
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = load <4 x i32>, <4 x i32>* %y
+ %cc = icmp slt <4 x i32> %a, %b
+ %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b
+ store <4 x i32> %c, <4 x i32>* %x
+ ret void
+}
+
+define void @smin_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
+; CHECK-LABEL: smin_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 2
+; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu
+; CHECK-NEXT: vle64.v v25, (a0)
+; CHECK-NEXT: vle64.v v26, (a1)
+; CHECK-NEXT: vmin.vv v25, v25, v26
+; CHECK-NEXT: vse64.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i64>, <2 x i64>* %x
+ %b = load <2 x i64>, <2 x i64>* %y
+ %cc = icmp slt <2 x i64> %a, %b
+ %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b
+ store <2 x i64> %c, <2 x i64>* %x
+ ret void
+}
+
+define void @smax_v16i8(<16 x i8>* %x, <16 x i8>* %y) {
+; CHECK-LABEL: smax_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vle8.v v26, (a1)
+; CHECK-NEXT: vmax.vv v25, v25, v26
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = load <16 x i8>, <16 x i8>* %y
+ %cc = icmp sgt <16 x i8> %a, %b
+ %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b
+ store <16 x i8> %c, <16 x i8>* %x
+ ret void
+}
+
+define void @smax_v8i16(<8 x i16>* %x, <8 x i16>* %y) {
+; CHECK-LABEL: smax_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vle16.v v26, (a1)
+; CHECK-NEXT: vmax.vv v25, v25, v26
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = load <8 x i16>, <8 x i16>* %y
+ %cc = icmp sgt <8 x i16> %a, %b
+ %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b
+ store <8 x i16> %c, <8 x i16>* %x
+ ret void
+}
+
+define void @smax_v4i32(<4 x i32>* %x, <4 x i32>* %y) {
+; CHECK-LABEL: smax_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vle32.v v26, (a1)
+; CHECK-NEXT: vmax.vv v25, v25, v26
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = load <4 x i32>, <4 x i32>* %y
+ %cc = icmp sgt <4 x i32> %a, %b
+ %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b
+ store <4 x i32> %c, <4 x i32>* %x
+ ret void
+}
+
+define void @smax_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
+; CHECK-LABEL: smax_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 2
+; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu
+; CHECK-NEXT: vle64.v v25, (a0)
+; CHECK-NEXT: vle64.v v26, (a1)
+; CHECK-NEXT: vmax.vv v25, v25, v26
+; CHECK-NEXT: vse64.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i64>, <2 x i64>* %x
+ %b = load <2 x i64>, <2 x i64>* %y
+ %cc = icmp sgt <2 x i64> %a, %b
+ %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b
+ store <2 x i64> %c, <2 x i64>* %x
+ ret void
+}
+
+define void @umin_v16i8(<16 x i8>* %x, <16 x i8>* %y) {
+; CHECK-LABEL: umin_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vle8.v v26, (a1)
+; CHECK-NEXT: vminu.vv v25, v25, v26
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = load <16 x i8>, <16 x i8>* %y
+ %cc = icmp ult <16 x i8> %a, %b
+ %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b
+ store <16 x i8> %c, <16 x i8>* %x
+ ret void
+}
+
+define void @umin_v8i16(<8 x i16>* %x, <8 x i16>* %y) {
+; CHECK-LABEL: umin_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vle16.v v26, (a1)
+; CHECK-NEXT: vminu.vv v25, v25, v26
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = load <8 x i16>, <8 x i16>* %y
+ %cc = icmp ult <8 x i16> %a, %b
+ %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b
+ store <8 x i16> %c, <8 x i16>* %x
+ ret void
+}
+
+define void @umin_v4i32(<4 x i32>* %x, <4 x i32>* %y) {
+; CHECK-LABEL: umin_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vle32.v v26, (a1)
+; CHECK-NEXT: vminu.vv v25, v25, v26
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = load <4 x i32>, <4 x i32>* %y
+ %cc = icmp ult <4 x i32> %a, %b
+ %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b
+ store <4 x i32> %c, <4 x i32>* %x
+ ret void
+}
+
+define void @umin_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
+; CHECK-LABEL: umin_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 2
+; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu
+; CHECK-NEXT: vle64.v v25, (a0)
+; CHECK-NEXT: vle64.v v26, (a1)
+; CHECK-NEXT: vminu.vv v25, v25, v26
+; CHECK-NEXT: vse64.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i64>, <2 x i64>* %x
+ %b = load <2 x i64>, <2 x i64>* %y
+ %cc = icmp ult <2 x i64> %a, %b
+ %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b
+ store <2 x i64> %c, <2 x i64>* %x
+ ret void
+}
+
+define void @umax_v16i8(<16 x i8>* %x, <16 x i8>* %y) {
+; CHECK-LABEL: umax_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vle8.v v26, (a1)
+; CHECK-NEXT: vmaxu.vv v25, v25, v26
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = load <16 x i8>, <16 x i8>* %y
+ %cc = icmp ugt <16 x i8> %a, %b
+ %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b
+ store <16 x i8> %c, <16 x i8>* %x
+ ret void
+}
+
+define void @umax_v8i16(<8 x i16>* %x, <8 x i16>* %y) {
+; CHECK-LABEL: umax_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vle16.v v26, (a1)
+; CHECK-NEXT: vmaxu.vv v25, v25, v26
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = load <8 x i16>, <8 x i16>* %y
+ %cc = icmp ugt <8 x i16> %a, %b
+ %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b
+ store <8 x i16> %c, <8 x i16>* %x
+ ret void
+}
+
+define void @umax_v4i32(<4 x i32>* %x, <4 x i32>* %y) {
+; CHECK-LABEL: umax_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vle32.v v26, (a1)
+; CHECK-NEXT: vmaxu.vv v25, v25, v26
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = load <4 x i32>, <4 x i32>* %y
+ %cc = icmp ugt <4 x i32> %a, %b
+ %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b
+ store <4 x i32> %c, <4 x i32>* %x
+ ret void
+}
+
+define void @umax_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
+; CHECK-LABEL: umax_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 2
+; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu
+; CHECK-NEXT: vle64.v v25, (a0)
+; CHECK-NEXT: vle64.v v26, (a1)
+; CHECK-NEXT: vmaxu.vv v25, v25, v26
+; CHECK-NEXT: vse64.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i64>, <2 x i64>* %x
+ %b = load <2 x i64>, <2 x i64>* %y
+ %cc = icmp ugt <2 x i64> %a, %b
+ %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b
+ store <2 x i64> %c, <2 x i64>* %x
+ ret void
+}
+
define void @add_v32i8(<32 x i8>* %x, <32 x i8>* %y) {
; LMULMAX2-LABEL: add_v32i8:
; LMULMAX2: # %bb.0:
@@ -3489,3 +3777,803 @@ define void @extract_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
store <4 x i64> %c, <4 x i64>* %x
ret void
}
+
+define void @smin_v32i8(<32 x i8>* %x, <32 x i8>* %y) {
+; LMULMAX2-LABEL: smin_v32i8:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 32
+; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vle8.v v26, (a0)
+; LMULMAX2-NEXT: vle8.v v28, (a1)
+; LMULMAX2-NEXT: vmin.vv v26, v26, v28
+; LMULMAX2-NEXT: vse8.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: smin_v32i8:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a2, zero, 16
+; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle8.v v25, (a0)
+; LMULMAX1-RV32-NEXT: addi a2, a0, 16
+; LMULMAX1-RV32-NEXT: vle8.v v26, (a2)
+; LMULMAX1-RV32-NEXT: addi a3, a1, 16
+; LMULMAX1-RV32-NEXT: vle8.v v27, (a3)
+; LMULMAX1-RV32-NEXT: vle8.v v28, (a1)
+; LMULMAX1-RV32-NEXT: vmin.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT: vmin.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT: vse8.v v25, (a0)
+; LMULMAX1-RV32-NEXT: vse8.v v26, (a2)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: smin_v32i8:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a2, zero, 16
+; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle8.v v25, (a0)
+; LMULMAX1-RV64-NEXT: addi a2, a1, 16
+; LMULMAX1-RV64-NEXT: vle8.v v26, (a2)
+; LMULMAX1-RV64-NEXT: addi a2, a0, 16
+; LMULMAX1-RV64-NEXT: vle8.v v27, (a2)
+; LMULMAX1-RV64-NEXT: vle8.v v28, (a1)
+; LMULMAX1-RV64-NEXT: vmin.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vmin.vv v25, v25, v28
+; LMULMAX1-RV64-NEXT: vse8.v v25, (a0)
+; LMULMAX1-RV64-NEXT: vse8.v v26, (a2)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <32 x i8>, <32 x i8>* %x
+ %b = load <32 x i8>, <32 x i8>* %y
+ %cc = icmp slt <32 x i8> %a, %b
+ %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b
+ store <32 x i8> %c, <32 x i8>* %x
+ ret void
+}
+
+define void @smin_v16i16(<16 x i16>* %x, <16 x i16>* %y) {
+; LMULMAX2-LABEL: smin_v16i16:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 16
+; LMULMAX2-NEXT: vsetvli a2, a2, e16,m2,ta,mu
+; LMULMAX2-NEXT: vle16.v v26, (a0)
+; LMULMAX2-NEXT: vle16.v v28, (a1)
+; LMULMAX2-NEXT: vmin.vv v26, v26, v28
+; LMULMAX2-NEXT: vse16.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: smin_v16i16:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a2, zero, 8
+; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle16.v v25, (a0)
+; LMULMAX1-RV32-NEXT: addi a2, a0, 16
+; LMULMAX1-RV32-NEXT: vle16.v v26, (a2)
+; LMULMAX1-RV32-NEXT: addi a3, a1, 16
+; LMULMAX1-RV32-NEXT: vle16.v v27, (a3)
+; LMULMAX1-RV32-NEXT: vle16.v v28, (a1)
+; LMULMAX1-RV32-NEXT: vmin.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT: vmin.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT: vse16.v v25, (a0)
+; LMULMAX1-RV32-NEXT: vse16.v v26, (a2)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: smin_v16i16:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a2, zero, 8
+; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle16.v v25, (a0)
+; LMULMAX1-RV64-NEXT: addi a2, a1, 16
+; LMULMAX1-RV64-NEXT: vle16.v v26, (a2)
+; LMULMAX1-RV64-NEXT: addi a2, a0, 16
+; LMULMAX1-RV64-NEXT: vle16.v v27, (a2)
+; LMULMAX1-RV64-NEXT: vle16.v v28, (a1)
+; LMULMAX1-RV64-NEXT: vmin.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vmin.vv v25, v25, v28
+; LMULMAX1-RV64-NEXT: vse16.v v25, (a0)
+; LMULMAX1-RV64-NEXT: vse16.v v26, (a2)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <16 x i16>, <16 x i16>* %x
+ %b = load <16 x i16>, <16 x i16>* %y
+ %cc = icmp slt <16 x i16> %a, %b
+ %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b
+ store <16 x i16> %c, <16 x i16>* %x
+ ret void
+}
+
+define void @smin_v8i32(<8 x i32>* %x, <8 x i32>* %y) {
+; LMULMAX2-LABEL: smin_v8i32:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 8
+; LMULMAX2-NEXT: vsetvli a2, a2, e32,m2,ta,mu
+; LMULMAX2-NEXT: vle32.v v26, (a0)
+; LMULMAX2-NEXT: vle32.v v28, (a1)
+; LMULMAX2-NEXT: vmin.vv v26, v26, v28
+; LMULMAX2-NEXT: vse32.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: smin_v8i32:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a2, zero, 4
+; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v25, (a0)
+; LMULMAX1-RV32-NEXT: addi a2, a0, 16
+; LMULMAX1-RV32-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV32-NEXT: addi a3, a1, 16
+; LMULMAX1-RV32-NEXT: vle32.v v27, (a3)
+; LMULMAX1-RV32-NEXT: vle32.v v28, (a1)
+; LMULMAX1-RV32-NEXT: vmin.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT: vmin.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT: vse32.v v25, (a0)
+; LMULMAX1-RV32-NEXT: vse32.v v26, (a2)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: smin_v8i32:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a2, zero, 4
+; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle32.v v25, (a0)
+; LMULMAX1-RV64-NEXT: addi a2, a1, 16
+; LMULMAX1-RV64-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV64-NEXT: addi a2, a0, 16
+; LMULMAX1-RV64-NEXT: vle32.v v27, (a2)
+; LMULMAX1-RV64-NEXT: vle32.v v28, (a1)
+; LMULMAX1-RV64-NEXT: vmin.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vmin.vv v25, v25, v28
+; LMULMAX1-RV64-NEXT: vse32.v v25, (a0)
+; LMULMAX1-RV64-NEXT: vse32.v v26, (a2)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <8 x i32>, <8 x i32>* %x
+ %b = load <8 x i32>, <8 x i32>* %y
+ %cc = icmp slt <8 x i32> %a, %b
+ %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b
+ store <8 x i32> %c, <8 x i32>* %x
+ ret void
+}
+
+define void @smin_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
+; LMULMAX2-LABEL: smin_v4i64:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 4
+; LMULMAX2-NEXT: vsetvli a2, a2, e64,m2,ta,mu
+; LMULMAX2-NEXT: vle64.v v26, (a0)
+; LMULMAX2-NEXT: vle64.v v28, (a1)
+; LMULMAX2-NEXT: vmin.vv v26, v26, v28
+; LMULMAX2-NEXT: vse64.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: smin_v4i64:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a2, zero, 2
+; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: addi a2, a0, 16
+; LMULMAX1-RV32-NEXT: vle64.v v26, (a2)
+; LMULMAX1-RV32-NEXT: addi a3, a1, 16
+; LMULMAX1-RV32-NEXT: vle64.v v27, (a3)
+; LMULMAX1-RV32-NEXT: vle64.v v28, (a1)
+; LMULMAX1-RV32-NEXT: vmin.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT: vmin.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: vse64.v v26, (a2)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: smin_v4i64:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a2, zero, 2
+; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e64,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
+; LMULMAX1-RV64-NEXT: addi a2, a1, 16
+; LMULMAX1-RV64-NEXT: vle64.v v26, (a2)
+; LMULMAX1-RV64-NEXT: addi a2, a0, 16
+; LMULMAX1-RV64-NEXT: vle64.v v27, (a2)
+; LMULMAX1-RV64-NEXT: vle64.v v28, (a1)
+; LMULMAX1-RV64-NEXT: vmin.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vmin.vv v25, v25, v28
+; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
+; LMULMAX1-RV64-NEXT: vse64.v v26, (a2)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <4 x i64>, <4 x i64>* %x
+ %b = load <4 x i64>, <4 x i64>* %y
+ %cc = icmp slt <4 x i64> %a, %b
+ %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b
+ store <4 x i64> %c, <4 x i64>* %x
+ ret void
+}
+
+define void @smax_v32i8(<32 x i8>* %x, <32 x i8>* %y) {
+; LMULMAX2-LABEL: smax_v32i8:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 32
+; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vle8.v v26, (a0)
+; LMULMAX2-NEXT: vle8.v v28, (a1)
+; LMULMAX2-NEXT: vmax.vv v26, v26, v28
+; LMULMAX2-NEXT: vse8.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: smax_v32i8:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a2, zero, 16
+; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle8.v v25, (a0)
+; LMULMAX1-RV32-NEXT: addi a2, a0, 16
+; LMULMAX1-RV32-NEXT: vle8.v v26, (a2)
+; LMULMAX1-RV32-NEXT: addi a3, a1, 16
+; LMULMAX1-RV32-NEXT: vle8.v v27, (a3)
+; LMULMAX1-RV32-NEXT: vle8.v v28, (a1)
+; LMULMAX1-RV32-NEXT: vmax.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT: vmax.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT: vse8.v v25, (a0)
+; LMULMAX1-RV32-NEXT: vse8.v v26, (a2)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: smax_v32i8:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a2, zero, 16
+; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle8.v v25, (a0)
+; LMULMAX1-RV64-NEXT: addi a2, a1, 16
+; LMULMAX1-RV64-NEXT: vle8.v v26, (a2)
+; LMULMAX1-RV64-NEXT: addi a2, a0, 16
+; LMULMAX1-RV64-NEXT: vle8.v v27, (a2)
+; LMULMAX1-RV64-NEXT: vle8.v v28, (a1)
+; LMULMAX1-RV64-NEXT: vmax.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vmax.vv v25, v25, v28
+; LMULMAX1-RV64-NEXT: vse8.v v25, (a0)
+; LMULMAX1-RV64-NEXT: vse8.v v26, (a2)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <32 x i8>, <32 x i8>* %x
+ %b = load <32 x i8>, <32 x i8>* %y
+ %cc = icmp sgt <32 x i8> %a, %b
+ %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b
+ store <32 x i8> %c, <32 x i8>* %x
+ ret void
+}
+
+define void @smax_v16i16(<16 x i16>* %x, <16 x i16>* %y) {
+; LMULMAX2-LABEL: smax_v16i16:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 16
+; LMULMAX2-NEXT: vsetvli a2, a2, e16,m2,ta,mu
+; LMULMAX2-NEXT: vle16.v v26, (a0)
+; LMULMAX2-NEXT: vle16.v v28, (a1)
+; LMULMAX2-NEXT: vmax.vv v26, v26, v28
+; LMULMAX2-NEXT: vse16.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: smax_v16i16:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a2, zero, 8
+; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle16.v v25, (a0)
+; LMULMAX1-RV32-NEXT: addi a2, a0, 16
+; LMULMAX1-RV32-NEXT: vle16.v v26, (a2)
+; LMULMAX1-RV32-NEXT: addi a3, a1, 16
+; LMULMAX1-RV32-NEXT: vle16.v v27, (a3)
+; LMULMAX1-RV32-NEXT: vle16.v v28, (a1)
+; LMULMAX1-RV32-NEXT: vmax.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT: vmax.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT: vse16.v v25, (a0)
+; LMULMAX1-RV32-NEXT: vse16.v v26, (a2)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: smax_v16i16:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a2, zero, 8
+; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle16.v v25, (a0)
+; LMULMAX1-RV64-NEXT: addi a2, a1, 16
+; LMULMAX1-RV64-NEXT: vle16.v v26, (a2)
+; LMULMAX1-RV64-NEXT: addi a2, a0, 16
+; LMULMAX1-RV64-NEXT: vle16.v v27, (a2)
+; LMULMAX1-RV64-NEXT: vle16.v v28, (a1)
+; LMULMAX1-RV64-NEXT: vmax.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vmax.vv v25, v25, v28
+; LMULMAX1-RV64-NEXT: vse16.v v25, (a0)
+; LMULMAX1-RV64-NEXT: vse16.v v26, (a2)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <16 x i16>, <16 x i16>* %x
+ %b = load <16 x i16>, <16 x i16>* %y
+ %cc = icmp sgt <16 x i16> %a, %b
+ %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b
+ store <16 x i16> %c, <16 x i16>* %x
+ ret void
+}
+
+define void @smax_v8i32(<8 x i32>* %x, <8 x i32>* %y) {
+; LMULMAX2-LABEL: smax_v8i32:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 8
+; LMULMAX2-NEXT: vsetvli a2, a2, e32,m2,ta,mu
+; LMULMAX2-NEXT: vle32.v v26, (a0)
+; LMULMAX2-NEXT: vle32.v v28, (a1)
+; LMULMAX2-NEXT: vmax.vv v26, v26, v28
+; LMULMAX2-NEXT: vse32.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: smax_v8i32:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a2, zero, 4
+; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v25, (a0)
+; LMULMAX1-RV32-NEXT: addi a2, a0, 16
+; LMULMAX1-RV32-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV32-NEXT: addi a3, a1, 16
+; LMULMAX1-RV32-NEXT: vle32.v v27, (a3)
+; LMULMAX1-RV32-NEXT: vle32.v v28, (a1)
+; LMULMAX1-RV32-NEXT: vmax.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT: vmax.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT: vse32.v v25, (a0)
+; LMULMAX1-RV32-NEXT: vse32.v v26, (a2)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: smax_v8i32:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a2, zero, 4
+; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle32.v v25, (a0)
+; LMULMAX1-RV64-NEXT: addi a2, a1, 16
+; LMULMAX1-RV64-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV64-NEXT: addi a2, a0, 16
+; LMULMAX1-RV64-NEXT: vle32.v v27, (a2)
+; LMULMAX1-RV64-NEXT: vle32.v v28, (a1)
+; LMULMAX1-RV64-NEXT: vmax.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vmax.vv v25, v25, v28
+; LMULMAX1-RV64-NEXT: vse32.v v25, (a0)
+; LMULMAX1-RV64-NEXT: vse32.v v26, (a2)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <8 x i32>, <8 x i32>* %x
+ %b = load <8 x i32>, <8 x i32>* %y
+ %cc = icmp sgt <8 x i32> %a, %b
+ %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b
+ store <8 x i32> %c, <8 x i32>* %x
+ ret void
+}
+
+define void @smax_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
+; LMULMAX2-LABEL: smax_v4i64:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 4
+; LMULMAX2-NEXT: vsetvli a2, a2, e64,m2,ta,mu
+; LMULMAX2-NEXT: vle64.v v26, (a0)
+; LMULMAX2-NEXT: vle64.v v28, (a1)
+; LMULMAX2-NEXT: vmax.vv v26, v26, v28
+; LMULMAX2-NEXT: vse64.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: smax_v4i64:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a2, zero, 2
+; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: addi a2, a0, 16
+; LMULMAX1-RV32-NEXT: vle64.v v26, (a2)
+; LMULMAX1-RV32-NEXT: addi a3, a1, 16
+; LMULMAX1-RV32-NEXT: vle64.v v27, (a3)
+; LMULMAX1-RV32-NEXT: vle64.v v28, (a1)
+; LMULMAX1-RV32-NEXT: vmax.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT: vmax.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: vse64.v v26, (a2)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: smax_v4i64:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a2, zero, 2
+; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e64,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
+; LMULMAX1-RV64-NEXT: addi a2, a1, 16
+; LMULMAX1-RV64-NEXT: vle64.v v26, (a2)
+; LMULMAX1-RV64-NEXT: addi a2, a0, 16
+; LMULMAX1-RV64-NEXT: vle64.v v27, (a2)
+; LMULMAX1-RV64-NEXT: vle64.v v28, (a1)
+; LMULMAX1-RV64-NEXT: vmax.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vmax.vv v25, v25, v28
+; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
+; LMULMAX1-RV64-NEXT: vse64.v v26, (a2)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <4 x i64>, <4 x i64>* %x
+ %b = load <4 x i64>, <4 x i64>* %y
+ %cc = icmp sgt <4 x i64> %a, %b
+ %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b
+ store <4 x i64> %c, <4 x i64>* %x
+ ret void
+}
+
+define void @umin_v32i8(<32 x i8>* %x, <32 x i8>* %y) {
+; LMULMAX2-LABEL: umin_v32i8:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 32
+; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vle8.v v26, (a0)
+; LMULMAX2-NEXT: vle8.v v28, (a1)
+; LMULMAX2-NEXT: vminu.vv v26, v26, v28
+; LMULMAX2-NEXT: vse8.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: umin_v32i8:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a2, zero, 16
+; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle8.v v25, (a0)
+; LMULMAX1-RV32-NEXT: addi a2, a0, 16
+; LMULMAX1-RV32-NEXT: vle8.v v26, (a2)
+; LMULMAX1-RV32-NEXT: addi a3, a1, 16
+; LMULMAX1-RV32-NEXT: vle8.v v27, (a3)
+; LMULMAX1-RV32-NEXT: vle8.v v28, (a1)
+; LMULMAX1-RV32-NEXT: vminu.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT: vminu.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT: vse8.v v25, (a0)
+; LMULMAX1-RV32-NEXT: vse8.v v26, (a2)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: umin_v32i8:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a2, zero, 16
+; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle8.v v25, (a0)
+; LMULMAX1-RV64-NEXT: addi a2, a1, 16
+; LMULMAX1-RV64-NEXT: vle8.v v26, (a2)
+; LMULMAX1-RV64-NEXT: addi a2, a0, 16
+; LMULMAX1-RV64-NEXT: vle8.v v27, (a2)
+; LMULMAX1-RV64-NEXT: vle8.v v28, (a1)
+; LMULMAX1-RV64-NEXT: vminu.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vminu.vv v25, v25, v28
+; LMULMAX1-RV64-NEXT: vse8.v v25, (a0)
+; LMULMAX1-RV64-NEXT: vse8.v v26, (a2)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <32 x i8>, <32 x i8>* %x
+ %b = load <32 x i8>, <32 x i8>* %y
+ %cc = icmp ult <32 x i8> %a, %b
+ %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b
+ store <32 x i8> %c, <32 x i8>* %x
+ ret void
+}
+
+define void @umin_v16i16(<16 x i16>* %x, <16 x i16>* %y) {
+; LMULMAX2-LABEL: umin_v16i16:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 16
+; LMULMAX2-NEXT: vsetvli a2, a2, e16,m2,ta,mu
+; LMULMAX2-NEXT: vle16.v v26, (a0)
+; LMULMAX2-NEXT: vle16.v v28, (a1)
+; LMULMAX2-NEXT: vminu.vv v26, v26, v28
+; LMULMAX2-NEXT: vse16.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: umin_v16i16:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a2, zero, 8
+; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle16.v v25, (a0)
+; LMULMAX1-RV32-NEXT: addi a2, a0, 16
+; LMULMAX1-RV32-NEXT: vle16.v v26, (a2)
+; LMULMAX1-RV32-NEXT: addi a3, a1, 16
+; LMULMAX1-RV32-NEXT: vle16.v v27, (a3)
+; LMULMAX1-RV32-NEXT: vle16.v v28, (a1)
+; LMULMAX1-RV32-NEXT: vminu.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT: vminu.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT: vse16.v v25, (a0)
+; LMULMAX1-RV32-NEXT: vse16.v v26, (a2)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: umin_v16i16:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a2, zero, 8
+; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle16.v v25, (a0)
+; LMULMAX1-RV64-NEXT: addi a2, a1, 16
+; LMULMAX1-RV64-NEXT: vle16.v v26, (a2)
+; LMULMAX1-RV64-NEXT: addi a2, a0, 16
+; LMULMAX1-RV64-NEXT: vle16.v v27, (a2)
+; LMULMAX1-RV64-NEXT: vle16.v v28, (a1)
+; LMULMAX1-RV64-NEXT: vminu.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vminu.vv v25, v25, v28
+; LMULMAX1-RV64-NEXT: vse16.v v25, (a0)
+; LMULMAX1-RV64-NEXT: vse16.v v26, (a2)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <16 x i16>, <16 x i16>* %x
+ %b = load <16 x i16>, <16 x i16>* %y
+ %cc = icmp ult <16 x i16> %a, %b
+ %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b
+ store <16 x i16> %c, <16 x i16>* %x
+ ret void
+}
+
+define void @umin_v8i32(<8 x i32>* %x, <8 x i32>* %y) {
+; LMULMAX2-LABEL: umin_v8i32:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 8
+; LMULMAX2-NEXT: vsetvli a2, a2, e32,m2,ta,mu
+; LMULMAX2-NEXT: vle32.v v26, (a0)
+; LMULMAX2-NEXT: vle32.v v28, (a1)
+; LMULMAX2-NEXT: vminu.vv v26, v26, v28
+; LMULMAX2-NEXT: vse32.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: umin_v8i32:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a2, zero, 4
+; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v25, (a0)
+; LMULMAX1-RV32-NEXT: addi a2, a0, 16
+; LMULMAX1-RV32-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV32-NEXT: addi a3, a1, 16
+; LMULMAX1-RV32-NEXT: vle32.v v27, (a3)
+; LMULMAX1-RV32-NEXT: vle32.v v28, (a1)
+; LMULMAX1-RV32-NEXT: vminu.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT: vminu.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT: vse32.v v25, (a0)
+; LMULMAX1-RV32-NEXT: vse32.v v26, (a2)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: umin_v8i32:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a2, zero, 4
+; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle32.v v25, (a0)
+; LMULMAX1-RV64-NEXT: addi a2, a1, 16
+; LMULMAX1-RV64-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV64-NEXT: addi a2, a0, 16
+; LMULMAX1-RV64-NEXT: vle32.v v27, (a2)
+; LMULMAX1-RV64-NEXT: vle32.v v28, (a1)
+; LMULMAX1-RV64-NEXT: vminu.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vminu.vv v25, v25, v28
+; LMULMAX1-RV64-NEXT: vse32.v v25, (a0)
+; LMULMAX1-RV64-NEXT: vse32.v v26, (a2)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <8 x i32>, <8 x i32>* %x
+ %b = load <8 x i32>, <8 x i32>* %y
+ %cc = icmp ult <8 x i32> %a, %b
+ %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b
+ store <8 x i32> %c, <8 x i32>* %x
+ ret void
+}
+
+define void @umin_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
+; LMULMAX2-LABEL: umin_v4i64:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 4
+; LMULMAX2-NEXT: vsetvli a2, a2, e64,m2,ta,mu
+; LMULMAX2-NEXT: vle64.v v26, (a0)
+; LMULMAX2-NEXT: vle64.v v28, (a1)
+; LMULMAX2-NEXT: vminu.vv v26, v26, v28
+; LMULMAX2-NEXT: vse64.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: umin_v4i64:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a2, zero, 2
+; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: addi a2, a0, 16
+; LMULMAX1-RV32-NEXT: vle64.v v26, (a2)
+; LMULMAX1-RV32-NEXT: addi a3, a1, 16
+; LMULMAX1-RV32-NEXT: vle64.v v27, (a3)
+; LMULMAX1-RV32-NEXT: vle64.v v28, (a1)
+; LMULMAX1-RV32-NEXT: vminu.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT: vminu.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: vse64.v v26, (a2)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: umin_v4i64:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a2, zero, 2
+; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e64,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
+; LMULMAX1-RV64-NEXT: addi a2, a1, 16
+; LMULMAX1-RV64-NEXT: vle64.v v26, (a2)
+; LMULMAX1-RV64-NEXT: addi a2, a0, 16
+; LMULMAX1-RV64-NEXT: vle64.v v27, (a2)
+; LMULMAX1-RV64-NEXT: vle64.v v28, (a1)
+; LMULMAX1-RV64-NEXT: vminu.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vminu.vv v25, v25, v28
+; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
+; LMULMAX1-RV64-NEXT: vse64.v v26, (a2)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <4 x i64>, <4 x i64>* %x
+ %b = load <4 x i64>, <4 x i64>* %y
+ %cc = icmp ult <4 x i64> %a, %b
+ %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b
+ store <4 x i64> %c, <4 x i64>* %x
+ ret void
+}
+
+define void @umax_v32i8(<32 x i8>* %x, <32 x i8>* %y) {
+; LMULMAX2-LABEL: umax_v32i8:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 32
+; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vle8.v v26, (a0)
+; LMULMAX2-NEXT: vle8.v v28, (a1)
+; LMULMAX2-NEXT: vmaxu.vv v26, v26, v28
+; LMULMAX2-NEXT: vse8.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: umax_v32i8:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a2, zero, 16
+; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle8.v v25, (a0)
+; LMULMAX1-RV32-NEXT: addi a2, a0, 16
+; LMULMAX1-RV32-NEXT: vle8.v v26, (a2)
+; LMULMAX1-RV32-NEXT: addi a3, a1, 16
+; LMULMAX1-RV32-NEXT: vle8.v v27, (a3)
+; LMULMAX1-RV32-NEXT: vle8.v v28, (a1)
+; LMULMAX1-RV32-NEXT: vmaxu.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT: vmaxu.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT: vse8.v v25, (a0)
+; LMULMAX1-RV32-NEXT: vse8.v v26, (a2)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: umax_v32i8:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a2, zero, 16
+; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle8.v v25, (a0)
+; LMULMAX1-RV64-NEXT: addi a2, a1, 16
+; LMULMAX1-RV64-NEXT: vle8.v v26, (a2)
+; LMULMAX1-RV64-NEXT: addi a2, a0, 16
+; LMULMAX1-RV64-NEXT: vle8.v v27, (a2)
+; LMULMAX1-RV64-NEXT: vle8.v v28, (a1)
+; LMULMAX1-RV64-NEXT: vmaxu.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vmaxu.vv v25, v25, v28
+; LMULMAX1-RV64-NEXT: vse8.v v25, (a0)
+; LMULMAX1-RV64-NEXT: vse8.v v26, (a2)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <32 x i8>, <32 x i8>* %x
+ %b = load <32 x i8>, <32 x i8>* %y
+ %cc = icmp ugt <32 x i8> %a, %b
+ %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b
+ store <32 x i8> %c, <32 x i8>* %x
+ ret void
+}
+
+define void @umax_v16i16(<16 x i16>* %x, <16 x i16>* %y) {
+; LMULMAX2-LABEL: umax_v16i16:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 16
+; LMULMAX2-NEXT: vsetvli a2, a2, e16,m2,ta,mu
+; LMULMAX2-NEXT: vle16.v v26, (a0)
+; LMULMAX2-NEXT: vle16.v v28, (a1)
+; LMULMAX2-NEXT: vmaxu.vv v26, v26, v28
+; LMULMAX2-NEXT: vse16.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: umax_v16i16:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a2, zero, 8
+; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle16.v v25, (a0)
+; LMULMAX1-RV32-NEXT: addi a2, a0, 16
+; LMULMAX1-RV32-NEXT: vle16.v v26, (a2)
+; LMULMAX1-RV32-NEXT: addi a3, a1, 16
+; LMULMAX1-RV32-NEXT: vle16.v v27, (a3)
+; LMULMAX1-RV32-NEXT: vle16.v v28, (a1)
+; LMULMAX1-RV32-NEXT: vmaxu.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT: vmaxu.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT: vse16.v v25, (a0)
+; LMULMAX1-RV32-NEXT: vse16.v v26, (a2)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: umax_v16i16:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a2, zero, 8
+; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle16.v v25, (a0)
+; LMULMAX1-RV64-NEXT: addi a2, a1, 16
+; LMULMAX1-RV64-NEXT: vle16.v v26, (a2)
+; LMULMAX1-RV64-NEXT: addi a2, a0, 16
+; LMULMAX1-RV64-NEXT: vle16.v v27, (a2)
+; LMULMAX1-RV64-NEXT: vle16.v v28, (a1)
+; LMULMAX1-RV64-NEXT: vmaxu.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vmaxu.vv v25, v25, v28
+; LMULMAX1-RV64-NEXT: vse16.v v25, (a0)
+; LMULMAX1-RV64-NEXT: vse16.v v26, (a2)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <16 x i16>, <16 x i16>* %x
+ %b = load <16 x i16>, <16 x i16>* %y
+ %cc = icmp ugt <16 x i16> %a, %b
+ %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b
+ store <16 x i16> %c, <16 x i16>* %x
+ ret void
+}
+
+define void @umax_v8i32(<8 x i32>* %x, <8 x i32>* %y) {
+; LMULMAX2-LABEL: umax_v8i32:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 8
+; LMULMAX2-NEXT: vsetvli a2, a2, e32,m2,ta,mu
+; LMULMAX2-NEXT: vle32.v v26, (a0)
+; LMULMAX2-NEXT: vle32.v v28, (a1)
+; LMULMAX2-NEXT: vmaxu.vv v26, v26, v28
+; LMULMAX2-NEXT: vse32.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: umax_v8i32:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a2, zero, 4
+; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v25, (a0)
+; LMULMAX1-RV32-NEXT: addi a2, a0, 16
+; LMULMAX1-RV32-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV32-NEXT: addi a3, a1, 16
+; LMULMAX1-RV32-NEXT: vle32.v v27, (a3)
+; LMULMAX1-RV32-NEXT: vle32.v v28, (a1)
+; LMULMAX1-RV32-NEXT: vmaxu.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT: vmaxu.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT: vse32.v v25, (a0)
+; LMULMAX1-RV32-NEXT: vse32.v v26, (a2)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: umax_v8i32:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a2, zero, 4
+; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle32.v v25, (a0)
+; LMULMAX1-RV64-NEXT: addi a2, a1, 16
+; LMULMAX1-RV64-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV64-NEXT: addi a2, a0, 16
+; LMULMAX1-RV64-NEXT: vle32.v v27, (a2)
+; LMULMAX1-RV64-NEXT: vle32.v v28, (a1)
+; LMULMAX1-RV64-NEXT: vmaxu.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vmaxu.vv v25, v25, v28
+; LMULMAX1-RV64-NEXT: vse32.v v25, (a0)
+; LMULMAX1-RV64-NEXT: vse32.v v26, (a2)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <8 x i32>, <8 x i32>* %x
+ %b = load <8 x i32>, <8 x i32>* %y
+ %cc = icmp ugt <8 x i32> %a, %b
+ %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b
+ store <8 x i32> %c, <8 x i32>* %x
+ ret void
+}
+
+define void @umax_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
+; LMULMAX2-LABEL: umax_v4i64:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 4
+; LMULMAX2-NEXT: vsetvli a2, a2, e64,m2,ta,mu
+; LMULMAX2-NEXT: vle64.v v26, (a0)
+; LMULMAX2-NEXT: vle64.v v28, (a1)
+; LMULMAX2-NEXT: vmaxu.vv v26, v26, v28
+; LMULMAX2-NEXT: vse64.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: umax_v4i64:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a2, zero, 2
+; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: addi a2, a0, 16
+; LMULMAX1-RV32-NEXT: vle64.v v26, (a2)
+; LMULMAX1-RV32-NEXT: addi a3, a1, 16
+; LMULMAX1-RV32-NEXT: vle64.v v27, (a3)
+; LMULMAX1-RV32-NEXT: vle64.v v28, (a1)
+; LMULMAX1-RV32-NEXT: vmaxu.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT: vmaxu.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: vse64.v v26, (a2)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: umax_v4i64:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a2, zero, 2
+; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e64,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
+; LMULMAX1-RV64-NEXT: addi a2, a1, 16
+; LMULMAX1-RV64-NEXT: vle64.v v26, (a2)
+; LMULMAX1-RV64-NEXT: addi a2, a0, 16
+; LMULMAX1-RV64-NEXT: vle64.v v27, (a2)
+; LMULMAX1-RV64-NEXT: vle64.v v28, (a1)
+; LMULMAX1-RV64-NEXT: vmaxu.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vmaxu.vv v25, v25, v28
+; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
+; LMULMAX1-RV64-NEXT: vse64.v v26, (a2)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <4 x i64>, <4 x i64>* %x
+ %b = load <4 x i64>, <4 x i64>* %y
+ %cc = icmp ugt <4 x i64> %a, %b
+ %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b
+ store <4 x i64> %c, <4 x i64>* %x
+ ret void
+}
More information about the llvm-commits
mailing list