[llvm] 156fc07 - [RISCV] Add support for fixed vector MULHU/MULHS.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 18 09:15:18 PST 2021
Author: Craig Topper
Date: 2021-02-18T09:15:08-08:00
New Revision: 156fc07e19ae599e638e18e598dbf5c5a4247408
URL: https://github.com/llvm/llvm-project/commit/156fc07e19ae599e638e18e598dbf5c5a4247408
DIFF: https://github.com/llvm/llvm-project/commit/156fc07e19ae599e638e18e598dbf5c5a4247408.diff
LOG: [RISCV] Add support for fixed vector MULHU/MULHS.
This uses to division by constant optimization to use MULHU/MULHS.
Reviewed By: frasercrmck, arcbbb
Differential Revision: https://reviews.llvm.org/D96934
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 0ae8085a2f1f..9576d3cbd6ed 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -559,6 +559,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UMIN, VT, Custom);
setOperationAction(ISD::UMAX, VT, Custom);
+ setOperationAction(ISD::MULHS, VT, Custom);
+ setOperationAction(ISD::MULHU, VT, Custom);
+
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
@@ -1219,6 +1222,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
case ISD::MUL:
return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
+ case ISD::MULHS:
+ return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
+ case ISD::MULHU:
+ return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
case ISD::AND:
return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
RISCVISD::AND_VL);
@@ -4968,6 +4975,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SMAX_VL)
NODE_NAME_CASE(UMIN_VL)
NODE_NAME_CASE(UMAX_VL)
+ NODE_NAME_CASE(MULHS_VL)
+ NODE_NAME_CASE(MULHU_VL)
NODE_NAME_CASE(SETCC_VL)
NODE_NAME_CASE(VSELECT_VL)
NODE_NAME_CASE(VMAND_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index b91437a975ba..8d761d26e06e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -170,6 +170,8 @@ enum NodeType : unsigned {
SMAX_VL,
UMIN_VL,
UMAX_VL,
+ MULHS_VL,
+ MULHU_VL,
// Vector compare producing a mask. Fourth operand is input mask. Fifth
// operand is VL.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index d6db0ccd1fa1..b7c08d5b6cbd 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -63,6 +63,8 @@ def riscv_vse_vl : SDNode<"RISCVISD::VSE_VL", SDT_RISCVVSE_VL,
def riscv_add_vl : SDNode<"RISCVISD::ADD_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_sub_vl : SDNode<"RISCVISD::SUB_VL", SDT_RISCVIntBinOp_VL>;
def riscv_mul_vl : SDNode<"RISCVISD::MUL_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
+def riscv_mulhs_vl : SDNode<"RISCVISD::MULHS_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
+def riscv_mulhu_vl : SDNode<"RISCVISD::MULHU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_and_vl : SDNode<"RISCVISD::AND_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_or_vl : SDNode<"RISCVISD::OR_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_xor_vl : SDNode<"RISCVISD::XOR_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
@@ -443,6 +445,8 @@ defm "" : VPatBinaryVL_VV_VX<riscv_smax_vl, "PseudoVMAX">;
// 12.10. Vector Single-Width Integer Multiply Instructions
defm "" : VPatBinaryVL_VV_VX<riscv_mul_vl, "PseudoVMUL">;
+defm "" : VPatBinaryVL_VV_VX<riscv_mulhs_vl, "PseudoVMULH">;
+defm "" : VPatBinaryVL_VV_VX<riscv_mulhu_vl, "PseudoVMULHU">;
// 12.11. Vector Integer Divide Instructions
defm "" : VPatBinaryVL_VV_VX<riscv_udiv_vl, "PseudoVDIVU">;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index 59037541f0ad..6a8b0611c248 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -888,6 +888,283 @@ define void @urem_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
ret void
}
+define void @mulhu_v16i8(<16 x i8>* %x) {
+; CHECK-LABEL: mulhu_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 16
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: lui a1, %hi(.LCPI52_0)
+; CHECK-NEXT: addi a1, a1, %lo(.LCPI52_0)
+; CHECK-NEXT: vle8.v v26, (a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI52_1)
+; CHECK-NEXT: addi a1, a1, %lo(.LCPI52_1)
+; CHECK-NEXT: vle8.v v27, (a1)
+; CHECK-NEXT: vsrl.vv v26, v25, v26
+; CHECK-NEXT: vmulhu.vv v26, v26, v27
+; CHECK-NEXT: lui a1, %hi(.LCPI52_2)
+; CHECK-NEXT: addi a1, a1, %lo(.LCPI52_2)
+; CHECK-NEXT: vle8.v v27, (a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI52_3)
+; CHECK-NEXT: addi a1, a1, %lo(.LCPI52_3)
+; CHECK-NEXT: vle8.v v28, (a1)
+; CHECK-NEXT: vsub.vv v25, v25, v26
+; CHECK-NEXT: vmulhu.vv v25, v25, v27
+; CHECK-NEXT: vadd.vv v25, v25, v26
+; CHECK-NEXT: vsrl.vv v25, v25, v28
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = udiv <16 x i8> %a, <i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25>
+ store <16 x i8> %b, <16 x i8>* %x
+ ret void
+}
+
+define void @mulhu_v8i16(<8 x i16>* %x) {
+; CHECK-LABEL: mulhu_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 8
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: lui a1, %hi(.LCPI53_0)
+; CHECK-NEXT: addi a1, a1, %lo(.LCPI53_0)
+; CHECK-NEXT: vle16.v v26, (a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI53_1)
+; CHECK-NEXT: addi a1, a1, %lo(.LCPI53_1)
+; CHECK-NEXT: vle16.v v27, (a1)
+; CHECK-NEXT: vsrl.vv v26, v25, v26
+; CHECK-NEXT: vmulhu.vv v26, v26, v27
+; CHECK-NEXT: lui a1, %hi(.LCPI53_2)
+; CHECK-NEXT: addi a1, a1, %lo(.LCPI53_2)
+; CHECK-NEXT: vle16.v v27, (a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI53_3)
+; CHECK-NEXT: addi a1, a1, %lo(.LCPI53_3)
+; CHECK-NEXT: vle16.v v28, (a1)
+; CHECK-NEXT: vsub.vv v25, v25, v26
+; CHECK-NEXT: vmulhu.vv v25, v25, v27
+; CHECK-NEXT: vadd.vv v25, v25, v26
+; CHECK-NEXT: vsrl.vv v25, v25, v28
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = udiv <8 x i16> %a, <i16 7, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+ store <8 x i16> %b, <8 x i16>* %x
+ ret void
+}
+
+define void @mulhu_v4i32(<4 x i32>* %x) {
+; CHECK-LABEL: mulhu_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 4
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: lui a1, %hi(.LCPI54_0)
+; CHECK-NEXT: addi a1, a1, %lo(.LCPI54_0)
+; CHECK-NEXT: vle32.v v26, (a1)
+; CHECK-NEXT: vmulhu.vv v26, v25, v26
+; CHECK-NEXT: lui a1, %hi(.LCPI54_1)
+; CHECK-NEXT: addi a1, a1, %lo(.LCPI54_1)
+; CHECK-NEXT: vle32.v v27, (a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI54_2)
+; CHECK-NEXT: addi a1, a1, %lo(.LCPI54_2)
+; CHECK-NEXT: vle32.v v28, (a1)
+; CHECK-NEXT: vsub.vv v25, v25, v26
+; CHECK-NEXT: vmulhu.vv v25, v25, v27
+; CHECK-NEXT: vadd.vv v25, v25, v26
+; CHECK-NEXT: vsrl.vv v25, v25, v28
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = udiv <4 x i32> %a, <i32 5, i32 6, i32 7, i32 9>
+ store <4 x i32> %b, <4 x i32>* %x
+ ret void
+}
+
+define void @mulhu_v2i64(<2 x i64>* %x) {
+; LMULMAX1-RV32-LABEL: mulhu_v2i64:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a1, zero, 2
+; LMULMAX1-RV32-NEXT: vsetvli a2, a1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI55_0)
+; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI55_0)
+; LMULMAX1-RV32-NEXT: addi a3, zero, 4
+; LMULMAX1-RV32-NEXT: vsetvli a4, a3, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV32-NEXT: vsetvli a2, a1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vmulhu.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI55_1)
+; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI55_1)
+; LMULMAX1-RV32-NEXT: vsetvli a3, a3, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV32-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: mulhu_v2i64:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a1, zero, 2
+; LMULMAX1-RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
+; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI55_0)
+; LMULMAX1-RV64-NEXT: addi a1, a1, %lo(.LCPI55_0)
+; LMULMAX1-RV64-NEXT: vle64.v v26, (a1)
+; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI55_1)
+; LMULMAX1-RV64-NEXT: addi a1, a1, %lo(.LCPI55_1)
+; LMULMAX1-RV64-NEXT: vle64.v v27, (a1)
+; LMULMAX1-RV64-NEXT: vmulhu.vv v25, v25, v26
+; LMULMAX1-RV64-NEXT: vsrl.vv v25, v25, v27
+; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <2 x i64>, <2 x i64>* %x
+ %b = udiv <2 x i64> %a, <i64 3, i64 5>
+ store <2 x i64> %b, <2 x i64>* %x
+ ret void
+}
+
+define void @mulhs_v16i8(<16 x i8>* %x) {
+; CHECK-LABEL: mulhs_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 16
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: lui a1, %hi(.LCPI56_0)
+; CHECK-NEXT: addi a1, a1, %lo(.LCPI56_0)
+; CHECK-NEXT: vle8.v v26, (a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI56_1)
+; CHECK-NEXT: addi a1, a1, %lo(.LCPI56_1)
+; CHECK-NEXT: vle8.v v27, (a1)
+; CHECK-NEXT: vmulhu.vv v25, v25, v26
+; CHECK-NEXT: vsrl.vv v25, v25, v27
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = udiv <16 x i8> %a, <i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9>
+ store <16 x i8> %b, <16 x i8>* %x
+ ret void
+}
+
+define void @mulhs_v8i16(<8 x i16>* %x) {
+; CHECK-LABEL: mulhs_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 8
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: lui a1, %hi(.LCPI57_0)
+; CHECK-NEXT: addi a1, a1, %lo(.LCPI57_0)
+; CHECK-NEXT: vle16.v v26, (a1)
+; CHECK-NEXT: vmulh.vv v25, v25, v26
+; CHECK-NEXT: vsra.vi v25, v25, 1
+; CHECK-NEXT: vsrl.vi v26, v25, 15
+; CHECK-NEXT: vadd.vv v25, v25, v26
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = sdiv <8 x i16> %a, <i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7, i16 -7, i16 7>
+ store <8 x i16> %b, <8 x i16>* %x
+ ret void
+}
+
+define void @mulhs_v4i32(<4 x i32>* %x) {
+; LMULMAX1-RV32-LABEL: mulhs_v4i32:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a1, zero, 4
+; LMULMAX1-RV32-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v25, (a0)
+; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI58_0)
+; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI58_0)
+; LMULMAX1-RV32-NEXT: vle32.v v26, (a1)
+; LMULMAX1-RV32-NEXT: vmulh.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT: vsrl.vi v26, v25, 31
+; LMULMAX1-RV32-NEXT: vsra.vi v25, v25, 1
+; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT: vse32.v v25, (a0)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: mulhs_v4i32:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a1, zero, 4
+; LMULMAX1-RV64-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle32.v v25, (a0)
+; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI58_0)
+; LMULMAX1-RV64-NEXT: addi a1, a1, %lo(.LCPI58_0)
+; LMULMAX1-RV64-NEXT: vle32.v v26, (a1)
+; LMULMAX1-RV64-NEXT: vmulh.vv v25, v25, v26
+; LMULMAX1-RV64-NEXT: vsra.vi v25, v25, 1
+; LMULMAX1-RV64-NEXT: vsrl.vi v26, v25, 31
+; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26
+; LMULMAX1-RV64-NEXT: vse32.v v25, (a0)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = sdiv <4 x i32> %a, <i32 -5, i32 5, i32 -5, i32 5>
+ store <4 x i32> %b, <4 x i32>* %x
+ ret void
+}
+
+define void @mulhs_v2i64(<2 x i64>* %x) {
+; LMULMAX1-RV32-LABEL: mulhs_v2i64:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a1, zero, 2
+; LMULMAX1-RV32-NEXT: vsetvli a2, a1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI59_0)
+; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI59_0)
+; LMULMAX1-RV32-NEXT: addi a3, zero, 4
+; LMULMAX1-RV32-NEXT: vsetvli a4, a3, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV32-NEXT: vsetvli a2, a1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vmul.vv v26, v25, v26
+; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI59_1)
+; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI59_1)
+; LMULMAX1-RV32-NEXT: vsetvli a4, a3, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v27, (a2)
+; LMULMAX1-RV32-NEXT: vsetvli a2, a1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vmulh.vv v25, v25, v27
+; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI59_2)
+; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI59_2)
+; LMULMAX1-RV32-NEXT: vsetvli a4, a3, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV32-NEXT: vsetvli a2, a1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vsrl.vv v26, v25, v26
+; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI59_3)
+; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI59_3)
+; LMULMAX1-RV32-NEXT: vsetvli a3, a3, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v27, (a2)
+; LMULMAX1-RV32-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vsra.vv v25, v25, v27
+; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: mulhs_v2i64:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a1, zero, 2
+; LMULMAX1-RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
+; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI59_0)
+; LMULMAX1-RV64-NEXT: addi a1, a1, %lo(.LCPI59_0)
+; LMULMAX1-RV64-NEXT: vle64.v v26, (a1)
+; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI59_1)
+; LMULMAX1-RV64-NEXT: addi a1, a1, %lo(.LCPI59_1)
+; LMULMAX1-RV64-NEXT: vle64.v v27, (a1)
+; LMULMAX1-RV64-NEXT: vmul.vv v26, v25, v26
+; LMULMAX1-RV64-NEXT: vmulh.vv v25, v25, v27
+; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26
+; LMULMAX1-RV64-NEXT: addi a1, zero, 63
+; LMULMAX1-RV64-NEXT: vsrl.vx v26, v25, a1
+; LMULMAX1-RV64-NEXT: vid.v v27
+; LMULMAX1-RV64-NEXT: vsra.vv v25, v25, v27
+; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26
+; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <2 x i64>, <2 x i64>* %x
+ %b = sdiv <2 x i64> %a, <i64 3, i64 -3>
+ store <2 x i64> %b, <2 x i64>* %x
+ ret void
+}
+
define void @smin_v16i8(<16 x i8>* %x, <16 x i8>* %y) {
; CHECK-LABEL: smin_v16i8:
; CHECK: # %bb.0:
@@ -3778,6 +4055,478 @@ define void @extract_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
ret void
}
+define void @mulhu_v32i8(<32 x i8>* %x) {
+; LMULMAX2-LABEL: mulhu_v32i8:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a1, zero, 32
+; LMULMAX2-NEXT: vsetvli a1, a1, e8,m2,ta,mu
+; LMULMAX2-NEXT: vle8.v v26, (a0)
+; LMULMAX2-NEXT: lui a1, %hi(.LCPI129_0)
+; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI129_0)
+; LMULMAX2-NEXT: vle8.v v28, (a1)
+; LMULMAX2-NEXT: lui a1, %hi(.LCPI129_1)
+; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI129_1)
+; LMULMAX2-NEXT: vle8.v v30, (a1)
+; LMULMAX2-NEXT: vsrl.vv v28, v26, v28
+; LMULMAX2-NEXT: vmulhu.vv v28, v28, v30
+; LMULMAX2-NEXT: lui a1, %hi(.LCPI129_2)
+; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI129_2)
+; LMULMAX2-NEXT: vle8.v v30, (a1)
+; LMULMAX2-NEXT: lui a1, %hi(.LCPI129_3)
+; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI129_3)
+; LMULMAX2-NEXT: vle8.v v8, (a1)
+; LMULMAX2-NEXT: vsub.vv v26, v26, v28
+; LMULMAX2-NEXT: vmulhu.vv v26, v26, v30
+; LMULMAX2-NEXT: vadd.vv v26, v26, v28
+; LMULMAX2-NEXT: vsrl.vv v26, v26, v8
+; LMULMAX2-NEXT: vse8.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: mulhu_v32i8:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a1, zero, 16
+; LMULMAX1-RV32-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; LMULMAX1-RV32-NEXT: addi a1, a0, 16
+; LMULMAX1-RV32-NEXT: vle8.v v25, (a1)
+; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI129_0)
+; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI129_0)
+; LMULMAX1-RV32-NEXT: vle8.v v26, (a2)
+; LMULMAX1-RV32-NEXT: vle8.v v27, (a0)
+; LMULMAX1-RV32-NEXT: vdivu.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT: vdivu.vv v26, v27, v26
+; LMULMAX1-RV32-NEXT: vse8.v v26, (a0)
+; LMULMAX1-RV32-NEXT: vse8.v v25, (a1)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: mulhu_v32i8:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a1, zero, 16
+; LMULMAX1-RV64-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; LMULMAX1-RV64-NEXT: addi a1, a0, 16
+; LMULMAX1-RV64-NEXT: vle8.v v25, (a1)
+; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI129_0)
+; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI129_0)
+; LMULMAX1-RV64-NEXT: vle8.v v26, (a2)
+; LMULMAX1-RV64-NEXT: vle8.v v27, (a0)
+; LMULMAX1-RV64-NEXT: vdivu.vv v25, v25, v26
+; LMULMAX1-RV64-NEXT: vdivu.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vse8.v v26, (a0)
+; LMULMAX1-RV64-NEXT: vse8.v v25, (a1)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <32 x i8>, <32 x i8>* %x
+ %b = udiv <32 x i8> %a, <i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25>
+ store <32 x i8> %b, <32 x i8>* %x
+ ret void
+}
+
+define void @mulhu_v16i16(<16 x i16>* %x) {
+; LMULMAX2-LABEL: mulhu_v16i16:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a1, zero, 16
+; LMULMAX2-NEXT: vsetvli a1, a1, e16,m2,ta,mu
+; LMULMAX2-NEXT: vle16.v v26, (a0)
+; LMULMAX2-NEXT: lui a1, %hi(.LCPI130_0)
+; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI130_0)
+; LMULMAX2-NEXT: vle16.v v28, (a1)
+; LMULMAX2-NEXT: lui a1, %hi(.LCPI130_1)
+; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI130_1)
+; LMULMAX2-NEXT: vle16.v v30, (a1)
+; LMULMAX2-NEXT: vsrl.vv v28, v26, v28
+; LMULMAX2-NEXT: vmulhu.vv v28, v28, v30
+; LMULMAX2-NEXT: lui a1, %hi(.LCPI130_2)
+; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI130_2)
+; LMULMAX2-NEXT: vle16.v v30, (a1)
+; LMULMAX2-NEXT: lui a1, %hi(.LCPI130_3)
+; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI130_3)
+; LMULMAX2-NEXT: vle16.v v8, (a1)
+; LMULMAX2-NEXT: vsub.vv v26, v26, v28
+; LMULMAX2-NEXT: vmulhu.vv v26, v26, v30
+; LMULMAX2-NEXT: vadd.vv v26, v26, v28
+; LMULMAX2-NEXT: vsrl.vv v26, v26, v8
+; LMULMAX2-NEXT: vse16.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: mulhu_v16i16:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a1, zero, 8
+; LMULMAX1-RV32-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; LMULMAX1-RV32-NEXT: addi a1, a0, 16
+; LMULMAX1-RV32-NEXT: vle16.v v25, (a1)
+; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI130_0)
+; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI130_0)
+; LMULMAX1-RV32-NEXT: vle16.v v26, (a2)
+; LMULMAX1-RV32-NEXT: vle16.v v27, (a0)
+; LMULMAX1-RV32-NEXT: vdivu.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT: vdivu.vv v26, v27, v26
+; LMULMAX1-RV32-NEXT: vse16.v v26, (a0)
+; LMULMAX1-RV32-NEXT: vse16.v v25, (a1)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: mulhu_v16i16:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a1, zero, 8
+; LMULMAX1-RV64-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; LMULMAX1-RV64-NEXT: addi a1, a0, 16
+; LMULMAX1-RV64-NEXT: vle16.v v25, (a1)
+; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI130_0)
+; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI130_0)
+; LMULMAX1-RV64-NEXT: vle16.v v26, (a2)
+; LMULMAX1-RV64-NEXT: vle16.v v27, (a0)
+; LMULMAX1-RV64-NEXT: vdivu.vv v25, v25, v26
+; LMULMAX1-RV64-NEXT: vdivu.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vse16.v v26, (a0)
+; LMULMAX1-RV64-NEXT: vse16.v v25, (a1)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <16 x i16>, <16 x i16>* %x
+ %b = udiv <16 x i16> %a, <i16 7, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 7, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+ store <16 x i16> %b, <16 x i16>* %x
+ ret void
+}
+
+define void @mulhu_v8i32(<8 x i32>* %x) {
+; LMULMAX2-LABEL: mulhu_v8i32:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a1, zero, 8
+; LMULMAX2-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; LMULMAX2-NEXT: vle32.v v26, (a0)
+; LMULMAX2-NEXT: lui a1, %hi(.LCPI131_0)
+; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI131_0)
+; LMULMAX2-NEXT: vle32.v v28, (a1)
+; LMULMAX2-NEXT: vmulhu.vv v28, v26, v28
+; LMULMAX2-NEXT: lui a1, %hi(.LCPI131_1)
+; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI131_1)
+; LMULMAX2-NEXT: vle32.v v30, (a1)
+; LMULMAX2-NEXT: lui a1, %hi(.LCPI131_2)
+; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI131_2)
+; LMULMAX2-NEXT: vle32.v v8, (a1)
+; LMULMAX2-NEXT: vsub.vv v26, v26, v28
+; LMULMAX2-NEXT: vmulhu.vv v26, v26, v30
+; LMULMAX2-NEXT: vadd.vv v26, v26, v28
+; LMULMAX2-NEXT: vsrl.vv v26, v26, v8
+; LMULMAX2-NEXT: vse32.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: mulhu_v8i32:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a1, zero, 4
+; LMULMAX1-RV32-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: addi a1, a0, 16
+; LMULMAX1-RV32-NEXT: vle32.v v25, (a1)
+; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI131_0)
+; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI131_0)
+; LMULMAX1-RV32-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV32-NEXT: vle32.v v27, (a0)
+; LMULMAX1-RV32-NEXT: vmulhu.vv v28, v25, v26
+; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI131_1)
+; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI131_1)
+; LMULMAX1-RV32-NEXT: vle32.v v29, (a2)
+; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI131_2)
+; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI131_2)
+; LMULMAX1-RV32-NEXT: vle32.v v30, (a2)
+; LMULMAX1-RV32-NEXT: vsub.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT: vmulhu.vv v25, v25, v29
+; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v30
+; LMULMAX1-RV32-NEXT: vmulhu.vv v26, v27, v26
+; LMULMAX1-RV32-NEXT: vsub.vv v27, v27, v26
+; LMULMAX1-RV32-NEXT: vmulhu.vv v27, v27, v29
+; LMULMAX1-RV32-NEXT: vadd.vv v26, v27, v26
+; LMULMAX1-RV32-NEXT: vsrl.vv v26, v26, v30
+; LMULMAX1-RV32-NEXT: vse32.v v26, (a0)
+; LMULMAX1-RV32-NEXT: vse32.v v25, (a1)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: mulhu_v8i32:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a1, zero, 4
+; LMULMAX1-RV64-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; LMULMAX1-RV64-NEXT: addi a1, a0, 16
+; LMULMAX1-RV64-NEXT: vle32.v v25, (a1)
+; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI131_0)
+; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI131_0)
+; LMULMAX1-RV64-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV64-NEXT: vle32.v v27, (a0)
+; LMULMAX1-RV64-NEXT: vdivu.vv v25, v25, v26
+; LMULMAX1-RV64-NEXT: vdivu.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vse32.v v26, (a0)
+; LMULMAX1-RV64-NEXT: vse32.v v25, (a1)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <8 x i32>, <8 x i32>* %x
+ %b = udiv <8 x i32> %a, <i32 5, i32 6, i32 7, i32 9, i32 5, i32 6, i32 7, i32 9>
+ store <8 x i32> %b, <8 x i32>* %x
+ ret void
+}
+
+define void @mulhu_v4i64(<4 x i64>* %x) {
+; LMULMAX1-RV32-LABEL: mulhu_v4i64:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a1, zero, 2
+; LMULMAX1-RV32-NEXT: vsetvli a2, a1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: addi a2, a0, 16
+; LMULMAX1-RV32-NEXT: vle64.v v26, (a2)
+; LMULMAX1-RV32-NEXT: lui a3, %hi(.LCPI132_0)
+; LMULMAX1-RV32-NEXT: addi a3, a3, %lo(.LCPI132_0)
+; LMULMAX1-RV32-NEXT: addi a4, zero, 4
+; LMULMAX1-RV32-NEXT: vsetvli a5, a4, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v27, (a3)
+; LMULMAX1-RV32-NEXT: vsetvli a3, a1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vdivu.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT: lui a3, %hi(.LCPI132_1)
+; LMULMAX1-RV32-NEXT: addi a3, a3, %lo(.LCPI132_1)
+; LMULMAX1-RV32-NEXT: vsetvli a4, a4, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v27, (a3)
+; LMULMAX1-RV32-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vdivu.vv v25, v25, v27
+; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: vse64.v v26, (a2)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: mulhu_v4i64:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a1, zero, 2
+; LMULMAX1-RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; LMULMAX1-RV64-NEXT: addi a1, a0, 16
+; LMULMAX1-RV64-NEXT: vle64.v v25, (a1)
+; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI132_0)
+; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI132_0)
+; LMULMAX1-RV64-NEXT: vle64.v v26, (a2)
+; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI132_1)
+; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI132_1)
+; LMULMAX1-RV64-NEXT: vle64.v v27, (a2)
+; LMULMAX1-RV64-NEXT: vle64.v v28, (a0)
+; LMULMAX1-RV64-NEXT: vmulhu.vv v26, v25, v26
+; LMULMAX1-RV64-NEXT: vsub.vv v25, v25, v26
+; LMULMAX1-RV64-NEXT: vmulhu.vv v25, v25, v27
+; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI132_2)
+; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI132_2)
+; LMULMAX1-RV64-NEXT: vle64.v v27, (a2)
+; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI132_3)
+; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI132_3)
+; LMULMAX1-RV64-NEXT: vle64.v v29, (a2)
+; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI132_4)
+; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI132_4)
+; LMULMAX1-RV64-NEXT: vle64.v v30, (a2)
+; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26
+; LMULMAX1-RV64-NEXT: vsrl.vv v25, v25, v27
+; LMULMAX1-RV64-NEXT: vmulhu.vv v26, v28, v29
+; LMULMAX1-RV64-NEXT: vsrl.vv v26, v26, v30
+; LMULMAX1-RV64-NEXT: vse64.v v26, (a0)
+; LMULMAX1-RV64-NEXT: vse64.v v25, (a1)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <4 x i64>, <4 x i64>* %x
+ %b = udiv <4 x i64> %a, <i64 3, i64 5, i64 7, i64 9>
+ store <4 x i64> %b, <4 x i64>* %x
+ ret void
+}
+
+define void @mulhs_v32i8(<32 x i8>* %x) {
+; LMULMAX2-LABEL: mulhs_v32i8:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a1, zero, 32
+; LMULMAX2-NEXT: vsetvli a1, a1, e8,m2,ta,mu
+; LMULMAX2-NEXT: vle8.v v26, (a0)
+; LMULMAX2-NEXT: lui a1, %hi(.LCPI133_0)
+; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI133_0)
+; LMULMAX2-NEXT: vle8.v v28, (a1)
+; LMULMAX2-NEXT: lui a1, %hi(.LCPI133_1)
+; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI133_1)
+; LMULMAX2-NEXT: vle8.v v30, (a1)
+; LMULMAX2-NEXT: vmulhu.vv v26, v26, v28
+; LMULMAX2-NEXT: vsrl.vv v26, v26, v30
+; LMULMAX2-NEXT: vse8.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: mulhs_v32i8:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a1, zero, 16
+; LMULMAX1-RV32-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; LMULMAX1-RV32-NEXT: addi a1, a0, 16
+; LMULMAX1-RV32-NEXT: vle8.v v25, (a1)
+; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI133_0)
+; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI133_0)
+; LMULMAX1-RV32-NEXT: vle8.v v26, (a2)
+; LMULMAX1-RV32-NEXT: vle8.v v27, (a0)
+; LMULMAX1-RV32-NEXT: vdivu.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT: vdivu.vv v26, v27, v26
+; LMULMAX1-RV32-NEXT: vse8.v v26, (a0)
+; LMULMAX1-RV32-NEXT: vse8.v v25, (a1)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: mulhs_v32i8:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a1, zero, 16
+; LMULMAX1-RV64-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; LMULMAX1-RV64-NEXT: addi a1, a0, 16
+; LMULMAX1-RV64-NEXT: vle8.v v25, (a1)
+; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI133_0)
+; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI133_0)
+; LMULMAX1-RV64-NEXT: vle8.v v26, (a2)
+; LMULMAX1-RV64-NEXT: vle8.v v27, (a0)
+; LMULMAX1-RV64-NEXT: vdivu.vv v25, v25, v26
+; LMULMAX1-RV64-NEXT: vdivu.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vse8.v v26, (a0)
+; LMULMAX1-RV64-NEXT: vse8.v v25, (a1)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <32 x i8>, <32 x i8>* %x
+ %b = udiv <32 x i8> %a, <i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9>
+ store <32 x i8> %b, <32 x i8>* %x
+ ret void
+}
+
+define void @mulhs_v16i16(<16 x i16>* %x) {
+; LMULMAX2-LABEL: mulhs_v16i16:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a1, zero, 16
+; LMULMAX2-NEXT: vsetvli a1, a1, e16,m2,ta,mu
+; LMULMAX2-NEXT: vle16.v v26, (a0)
+; LMULMAX2-NEXT: lui a1, %hi(.LCPI134_0)
+; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI134_0)
+; LMULMAX2-NEXT: vle16.v v28, (a1)
+; LMULMAX2-NEXT: vmulh.vv v26, v26, v28
+; LMULMAX2-NEXT: vsra.vi v26, v26, 1
+; LMULMAX2-NEXT: vsrl.vi v28, v26, 15
+; LMULMAX2-NEXT: vadd.vv v26, v26, v28
+; LMULMAX2-NEXT: vse16.v v26, (a0)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-RV32-LABEL: mulhs_v16i16:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a1, zero, 8
+; LMULMAX1-RV32-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; LMULMAX1-RV32-NEXT: addi a1, a0, 16
+; LMULMAX1-RV32-NEXT: vle16.v v25, (a1)
+; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI134_0)
+; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI134_0)
+; LMULMAX1-RV32-NEXT: vle16.v v26, (a2)
+; LMULMAX1-RV32-NEXT: vle16.v v27, (a0)
+; LMULMAX1-RV32-NEXT: vdiv.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT: vdiv.vv v26, v27, v26
+; LMULMAX1-RV32-NEXT: vse16.v v26, (a0)
+; LMULMAX1-RV32-NEXT: vse16.v v25, (a1)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: mulhs_v16i16:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a1, zero, 8
+; LMULMAX1-RV64-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; LMULMAX1-RV64-NEXT: addi a1, a0, 16
+; LMULMAX1-RV64-NEXT: vle16.v v25, (a1)
+; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI134_0)
+; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI134_0)
+; LMULMAX1-RV64-NEXT: vle16.v v26, (a2)
+; LMULMAX1-RV64-NEXT: vle16.v v27, (a0)
+; LMULMAX1-RV64-NEXT: vdiv.vv v25, v25, v26
+; LMULMAX1-RV64-NEXT: vdiv.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vse16.v v26, (a0)
+; LMULMAX1-RV64-NEXT: vse16.v v25, (a1)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <16 x i16>, <16 x i16>* %x
+ %b = sdiv <16 x i16> %a, <i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7, i16 -7, i16 7, i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7, i16 -7, i16 7>
+ store <16 x i16> %b, <16 x i16>* %x
+ ret void
+}
+
+define void @mulhs_v8i32(<8 x i32>* %x) {
+; LMULMAX1-RV32-LABEL: mulhs_v8i32:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a1, zero, 4
+; LMULMAX1-RV32-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: addi a1, a0, 16
+; LMULMAX1-RV32-NEXT: vle32.v v25, (a1)
+; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI135_0)
+; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI135_0)
+; LMULMAX1-RV32-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV32-NEXT: vle32.v v27, (a0)
+; LMULMAX1-RV32-NEXT: vmulh.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT: vsrl.vi v28, v25, 31
+; LMULMAX1-RV32-NEXT: vsra.vi v25, v25, 1
+; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v28
+; LMULMAX1-RV32-NEXT: vmulh.vv v26, v27, v26
+; LMULMAX1-RV32-NEXT: vsrl.vi v27, v26, 31
+; LMULMAX1-RV32-NEXT: vsra.vi v26, v26, 1
+; LMULMAX1-RV32-NEXT: vadd.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT: vse32.v v26, (a0)
+; LMULMAX1-RV32-NEXT: vse32.v v25, (a1)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: mulhs_v8i32:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a1, zero, 4
+; LMULMAX1-RV64-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; LMULMAX1-RV64-NEXT: addi a1, a0, 16
+; LMULMAX1-RV64-NEXT: vle32.v v25, (a1)
+; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI135_0)
+; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI135_0)
+; LMULMAX1-RV64-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV64-NEXT: vle32.v v27, (a0)
+; LMULMAX1-RV64-NEXT: vdiv.vv v25, v25, v26
+; LMULMAX1-RV64-NEXT: vdiv.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vse32.v v26, (a0)
+; LMULMAX1-RV64-NEXT: vse32.v v25, (a1)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <8 x i32>, <8 x i32>* %x
+ %b = sdiv <8 x i32> %a, <i32 -5, i32 5, i32 -5, i32 5, i32 -5, i32 5, i32 -5, i32 5>
+ store <8 x i32> %b, <8 x i32>* %x
+ ret void
+}
+
+define void @mulhs_v4i64(<4 x i64>* %x) {
+; LMULMAX1-RV32-LABEL: mulhs_v4i64:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a1, zero, 2
+; LMULMAX1-RV32-NEXT: vsetvli a2, a1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: addi a2, a0, 16
+; LMULMAX1-RV32-NEXT: vle64.v v26, (a2)
+; LMULMAX1-RV32-NEXT: lui a3, %hi(.LCPI136_0)
+; LMULMAX1-RV32-NEXT: addi a3, a3, %lo(.LCPI136_0)
+; LMULMAX1-RV32-NEXT: addi a4, zero, 4
+; LMULMAX1-RV32-NEXT: vsetvli a4, a4, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v27, (a3)
+; LMULMAX1-RV32-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vdiv.vv v26, v26, v27
+; LMULMAX1-RV32-NEXT: vdiv.vv v25, v25, v27
+; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: vse64.v v26, (a2)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: mulhs_v4i64:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a1, zero, 2
+; LMULMAX1-RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; LMULMAX1-RV64-NEXT: addi a1, a0, 16
+; LMULMAX1-RV64-NEXT: vle64.v v25, (a1)
+; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI136_0)
+; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI136_0)
+; LMULMAX1-RV64-NEXT: vle64.v v26, (a2)
+; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI136_1)
+; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI136_1)
+; LMULMAX1-RV64-NEXT: vle64.v v27, (a2)
+; LMULMAX1-RV64-NEXT: vle64.v v28, (a0)
+; LMULMAX1-RV64-NEXT: vmul.vv v29, v25, v26
+; LMULMAX1-RV64-NEXT: vmulh.vv v25, v25, v27
+; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v29
+; LMULMAX1-RV64-NEXT: addi a2, zero, 63
+; LMULMAX1-RV64-NEXT: vsrl.vx v29, v25, a2
+; LMULMAX1-RV64-NEXT: vid.v v30
+; LMULMAX1-RV64-NEXT: vsra.vv v25, v25, v30
+; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v29
+; LMULMAX1-RV64-NEXT: vmul.vv v26, v28, v26
+; LMULMAX1-RV64-NEXT: vmulh.vv v27, v28, v27
+; LMULMAX1-RV64-NEXT: vadd.vv v26, v27, v26
+; LMULMAX1-RV64-NEXT: vsrl.vx v27, v26, a2
+; LMULMAX1-RV64-NEXT: vsra.vv v26, v26, v30
+; LMULMAX1-RV64-NEXT: vadd.vv v26, v26, v27
+; LMULMAX1-RV64-NEXT: vse64.v v26, (a0)
+; LMULMAX1-RV64-NEXT: vse64.v v25, (a1)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <4 x i64>, <4 x i64>* %x
+ %b = sdiv <4 x i64> %a, <i64 3, i64 -3, i64 3, i64 -3>
+ store <4 x i64> %b, <4 x i64>* %x
+ ret void
+}
+
define void @smin_v32i8(<32 x i8>* %x, <32 x i8>* %y) {
; LMULMAX2-LABEL: smin_v32i8:
; LMULMAX2: # %bb.0:
@@ -6212,3 +6961,264 @@ define void @urem_vx_v4i32(<4 x i32>* %x, i32 %y) {
store <4 x i32> %d, <4 x i32>* %x
ret void
}
+
+define void @mulhu_vx_v16i8(<16 x i8>* %x) {
+; CHECK-LABEL: mulhu_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 16
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: addi a1, zero, 57
+; CHECK-NEXT: vmulhu.vx v25, v25, a1
+; CHECK-NEXT: vsrl.vi v25, v25, 1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = udiv <16 x i8> %a, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
+ store <16 x i8> %b, <16 x i8>* %x
+ ret void
+}
+
+define void @mulhu_vx_v8i16(<8 x i16>* %x) {
+; LMULMAX1-RV32-LABEL: mulhu_vx_v8i16:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a1, zero, 8
+; LMULMAX1-RV32-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle16.v v25, (a0)
+; LMULMAX1-RV32-NEXT: lui a1, 2
+; LMULMAX1-RV32-NEXT: addi a1, a1, 1171
+; LMULMAX1-RV32-NEXT: vmulhu.vx v26, v25, a1
+; LMULMAX1-RV32-NEXT: vsub.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT: vsrl.vi v25, v25, 1
+; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT: vsrl.vi v25, v25, 2
+; LMULMAX1-RV32-NEXT: vse16.v v25, (a0)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: mulhu_vx_v8i16:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a1, zero, 8
+; LMULMAX1-RV64-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle16.v v25, (a0)
+; LMULMAX1-RV64-NEXT: lui a1, 2
+; LMULMAX1-RV64-NEXT: addiw a1, a1, 1171
+; LMULMAX1-RV64-NEXT: vmulhu.vx v26, v25, a1
+; LMULMAX1-RV64-NEXT: vsub.vv v25, v25, v26
+; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 1
+; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26
+; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 2
+; LMULMAX1-RV64-NEXT: vse16.v v25, (a0)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+ store <8 x i16> %b, <8 x i16>* %x
+ ret void
+}
+
+define void @mulhu_vx_v4i32(<4 x i32>* %x) {
+; LMULMAX1-RV32-LABEL: mulhu_vx_v4i32:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a1, zero, 4
+; LMULMAX1-RV32-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v25, (a0)
+; LMULMAX1-RV32-NEXT: lui a1, 838861
+; LMULMAX1-RV32-NEXT: addi a1, a1, -819
+; LMULMAX1-RV32-NEXT: vmulhu.vx v25, v25, a1
+; LMULMAX1-RV32-NEXT: vsrl.vi v25, v25, 2
+; LMULMAX1-RV32-NEXT: vse32.v v25, (a0)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: mulhu_vx_v4i32:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a1, zero, 4
+; LMULMAX1-RV64-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle32.v v25, (a0)
+; LMULMAX1-RV64-NEXT: lui a1, 838861
+; LMULMAX1-RV64-NEXT: addiw a1, a1, -819
+; LMULMAX1-RV64-NEXT: vmulhu.vx v25, v25, a1
+; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 2
+; LMULMAX1-RV64-NEXT: vse32.v v25, (a0)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = udiv <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+ store <4 x i32> %b, <4 x i32>* %x
+ ret void
+}
+
+define void @mulhu_vx_v2i64(<2 x i64>* %x) {
+; LMULMAX1-RV32-LABEL: mulhu_vx_v2i64:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a1, zero, 2
+; LMULMAX1-RV32-NEXT: vsetvli a2, a1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI252_0)
+; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI252_0)
+; LMULMAX1-RV32-NEXT: addi a3, zero, 4
+; LMULMAX1-RV32-NEXT: vsetvli a4, a3, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV32-NEXT: vsetvli a2, a1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vmulhu.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI252_1)
+; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI252_1)
+; LMULMAX1-RV32-NEXT: vsetvli a3, a3, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV32-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: mulhu_vx_v2i64:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a1, zero, 2
+; LMULMAX1-RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
+; LMULMAX1-RV64-NEXT: lui a1, 1026731
+; LMULMAX1-RV64-NEXT: addiw a1, a1, -1365
+; LMULMAX1-RV64-NEXT: slli a1, a1, 12
+; LMULMAX1-RV64-NEXT: addi a1, a1, -1365
+; LMULMAX1-RV64-NEXT: slli a1, a1, 12
+; LMULMAX1-RV64-NEXT: addi a1, a1, -1365
+; LMULMAX1-RV64-NEXT: slli a1, a1, 12
+; LMULMAX1-RV64-NEXT: addi a1, a1, -1365
+; LMULMAX1-RV64-NEXT: vmulhu.vx v25, v25, a1
+; LMULMAX1-RV64-NEXT: vsrl.vi v25, v25, 1
+; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <2 x i64>, <2 x i64>* %x
+ %b = udiv <2 x i64> %a, <i64 3, i64 3>
+ store <2 x i64> %b, <2 x i64>* %x
+ ret void
+}
+
+define void @mulhs_vx_v16i8(<16 x i8>* %x) {
+; CHECK-LABEL: mulhs_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 16
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: addi a1, zero, -123
+; CHECK-NEXT: vmulhu.vx v25, v25, a1
+; CHECK-NEXT: vsrl.vi v25, v25, 7
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = udiv <16 x i8> %a, <i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9>
+ store <16 x i8> %b, <16 x i8>* %x
+ ret void
+}
+
+define void @mulhs_vx_v8i16(<8 x i16>* %x) {
+; LMULMAX1-RV32-LABEL: mulhs_vx_v8i16:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a1, zero, 8
+; LMULMAX1-RV32-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle16.v v25, (a0)
+; LMULMAX1-RV32-NEXT: lui a1, 5
+; LMULMAX1-RV32-NEXT: addi a1, a1, -1755
+; LMULMAX1-RV32-NEXT: vmulh.vx v25, v25, a1
+; LMULMAX1-RV32-NEXT: vsra.vi v25, v25, 1
+; LMULMAX1-RV32-NEXT: vsrl.vi v26, v25, 15
+; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT: vse16.v v25, (a0)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: mulhs_vx_v8i16:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a1, zero, 8
+; LMULMAX1-RV64-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle16.v v25, (a0)
+; LMULMAX1-RV64-NEXT: lui a1, 5
+; LMULMAX1-RV64-NEXT: addiw a1, a1, -1755
+; LMULMAX1-RV64-NEXT: vmulh.vx v25, v25, a1
+; LMULMAX1-RV64-NEXT: vsra.vi v25, v25, 1
+; LMULMAX1-RV64-NEXT: vsrl.vi v26, v25, 15
+; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26
+; LMULMAX1-RV64-NEXT: vse16.v v25, (a0)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+ store <8 x i16> %b, <8 x i16>* %x
+ ret void
+}
+
+define void @mulhs_vx_v4i32(<4 x i32>* %x) {
+; LMULMAX1-RV32-LABEL: mulhs_vx_v4i32:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a1, zero, 4
+; LMULMAX1-RV32-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v25, (a0)
+; LMULMAX1-RV32-NEXT: lui a1, 629146
+; LMULMAX1-RV32-NEXT: addi a1, a1, -1639
+; LMULMAX1-RV32-NEXT: vmulh.vx v25, v25, a1
+; LMULMAX1-RV32-NEXT: vsrl.vi v26, v25, 31
+; LMULMAX1-RV32-NEXT: vsra.vi v25, v25, 1
+; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT: vse32.v v25, (a0)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: mulhs_vx_v4i32:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a1, zero, 4
+; LMULMAX1-RV64-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle32.v v25, (a0)
+; LMULMAX1-RV64-NEXT: lui a1, 629146
+; LMULMAX1-RV64-NEXT: addiw a1, a1, -1639
+; LMULMAX1-RV64-NEXT: vmulh.vx v25, v25, a1
+; LMULMAX1-RV64-NEXT: vsra.vi v25, v25, 1
+; LMULMAX1-RV64-NEXT: vsrl.vi v26, v25, 31
+; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26
+; LMULMAX1-RV64-NEXT: vse32.v v25, (a0)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = sdiv <4 x i32> %a, <i32 -5, i32 -5, i32 -5, i32 -5>
+ store <4 x i32> %b, <4 x i32>* %x
+ ret void
+}
+
+define void @mulhs_vx_v2i64(<2 x i64>* %x) {
+; LMULMAX1-RV32-LABEL: mulhs_vx_v2i64:
+; LMULMAX1-RV32: # %bb.0:
+; LMULMAX1-RV32-NEXT: addi a1, zero, 2
+; LMULMAX1-RV32-NEXT: vsetvli a2, a1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI256_0)
+; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI256_0)
+; LMULMAX1-RV32-NEXT: addi a3, zero, 4
+; LMULMAX1-RV32-NEXT: vsetvli a4, a3, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV32-NEXT: vsetvli a2, a1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vmulh.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI256_1)
+; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI256_1)
+; LMULMAX1-RV32-NEXT: vsetvli a3, a3, e32,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vle32.v v26, (a2)
+; LMULMAX1-RV32-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; LMULMAX1-RV32-NEXT: vsrl.vv v26, v25, v26
+; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26
+; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
+; LMULMAX1-RV32-NEXT: ret
+;
+; LMULMAX1-RV64-LABEL: mulhs_vx_v2i64:
+; LMULMAX1-RV64: # %bb.0:
+; LMULMAX1-RV64-NEXT: addi a1, zero, 2
+; LMULMAX1-RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
+; LMULMAX1-RV64-NEXT: lui a1, 21845
+; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365
+; LMULMAX1-RV64-NEXT: slli a1, a1, 12
+; LMULMAX1-RV64-NEXT: addi a1, a1, 1365
+; LMULMAX1-RV64-NEXT: slli a1, a1, 12
+; LMULMAX1-RV64-NEXT: addi a1, a1, 1365
+; LMULMAX1-RV64-NEXT: slli a1, a1, 12
+; LMULMAX1-RV64-NEXT: addi a1, a1, 1366
+; LMULMAX1-RV64-NEXT: vmulh.vx v25, v25, a1
+; LMULMAX1-RV64-NEXT: addi a1, zero, 63
+; LMULMAX1-RV64-NEXT: vsrl.vx v26, v25, a1
+; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v26
+; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
+; LMULMAX1-RV64-NEXT: ret
+ %a = load <2 x i64>, <2 x i64>* %x
+ %b = sdiv <2 x i64> %a, <i64 3, i64 3>
+ store <2 x i64> %b, <2 x i64>* %x
+ ret void
+}
More information about the llvm-commits
mailing list