[llvm] [RISCV] Optimize divide by constant for VP intrinsics (PR #125991)
Jesse Huang via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 25 00:55:19 PST 2025
https://github.com/jaidTw updated https://github.com/llvm/llvm-project/pull/125991
>From 00095e1e42a7075dfc258808962de386e4f35a5d Mon Sep 17 00:00:00 2001
From: Yeting Kuo <46629943+fakepaper56 at users.noreply.github.com>
Date: Tue, 31 Jan 2023 09:52:06 +0800
Subject: [PATCH 01/10] [VP][RISCV] Add vp isd opcodes VP_MULH/VP_MULHS
Add ISD opcodes VP_MULH/VP_MULHS which could be used by VP optimizations.
---
llvm/include/llvm/IR/VPIntrinsics.def | 5 +++++
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 8 ++++----
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 5 +++++
3 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 55f4719da7c8b..e71ca44779adb 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -180,6 +180,11 @@ HELPER_REGISTER_BINARY_INT_VP(vp_xor, VP_XOR, Xor, XOR)
#undef HELPER_REGISTER_BINARY_INT_VP
+BEGIN_REGISTER_VP_SDNODE(VP_MULHU, -1, vp_mulhs, 2, 3)
+END_REGISTER_VP_SDNODE(VP_MULHU)
+BEGIN_REGISTER_VP_SDNODE(VP_MULHS, -1, vp_mulhs, 2, 3)
+END_REGISTER_VP_SDNODE(VP_MULHS)
+
// llvm.vp.smin(x,y,mask,vlen)
BEGIN_REGISTER_VP(vp_smin, 2, 3, VP_SMIN, -1)
VP_PROPERTY_BINARYOP
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 1000235ab4061..6e2f37d7c3dd4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1277,8 +1277,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ADD: case ISD::VP_ADD:
case ISD::SUB: case ISD::VP_SUB:
case ISD::MUL: case ISD::VP_MUL:
- case ISD::MULHS:
- case ISD::MULHU:
+ case ISD::MULHS: case ISD::VP_MULHS:
+ case ISD::MULHU: case ISD::VP_MULHU:
case ISD::ABDS:
case ISD::ABDU:
case ISD::AVGCEILS:
@@ -4552,8 +4552,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ADD: case ISD::VP_ADD:
case ISD::AND: case ISD::VP_AND:
case ISD::MUL: case ISD::VP_MUL:
- case ISD::MULHS:
- case ISD::MULHU:
+ case ISD::MULHS: case ISD::VP_MULHS:
+ case ISD::MULHU: case ISD::VP_MULHU:
case ISD::ABDS:
case ISD::ABDU:
case ISD::OR: case ISD::VP_OR:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8e3caf51d876b..3a4f1fefa9445 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -696,6 +696,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
+ ISD::VP_MULHU, ISD::VP_MULHS,
ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
@@ -6410,6 +6411,8 @@ static unsigned getRISCVVLOp(SDValue Op) {
VP_CASE(ADD) // VP_ADD
VP_CASE(SUB) // VP_SUB
VP_CASE(MUL) // VP_MUL
+ VP_CASE(MULHS) // VP_MULHS
+ VP_CASE(MULHU) // VP_MULHU
VP_CASE(SDIV) // VP_SDIV
VP_CASE(SREM) // VP_SREM
VP_CASE(UDIV) // VP_UDIV
@@ -7605,6 +7608,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VP_ADD:
case ISD::VP_SUB:
case ISD::VP_MUL:
+ case ISD::VP_MULHS:
+ case ISD::VP_MULHU:
case ISD::VP_SDIV:
case ISD::VP_UDIV:
case ISD::VP_SREM:
>From bf0b608a2a52e8b6a9edf7cc725c85de7bd47858 Mon Sep 17 00:00:00 2001
From: Jesse Huang <jesse.huang at sifive.com>
Date: Fri, 10 Feb 2023 11:52:47 +0800
Subject: [PATCH 02/10] [LLVM][VP] Optimize divide by constant for VP
intrinsics
This patch implemented divide by constants foldings for vp.u(s)div and vp.u(s)rem as well as some other minor foldings such as div by pow of 2, div by INT_MAX, etc.
---
llvm/include/llvm/CodeGen/TargetLowering.h | 4 +
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 330 ++++
.../CodeGen/SelectionDAG/TargetLowering.cpp | 253 +++
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 10 +
llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll | 1540 +++++++++++++++++
5 files changed, 2137 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 04ee24c0916e5..6447752c451d8 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5108,6 +5108,10 @@ class TargetLowering : public TargetLoweringBase {
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const;
+ SDValue BuildVPSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
+ SmallVectorImpl<SDNode *> &Created) const;
+ SDValue BuildVPUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
+ SmallVectorImpl<SDNode *> &Created) const;
/// Targets may override this function to provide custom SDIV lowering for
/// power-of-2 denominators. If the target returns an empty SDValue, LLVM
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8858c2012c706..74ab35f8c5f05 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -564,6 +564,14 @@ namespace {
SDValue visitFSUBForFMACombine(SDNode *N);
SDValue visitFMULForFMADistributiveCombine(SDNode *N);
+ SDValue visitVPUDIV(SDNode *N);
+ SDValue visitVPUDIVLike(SDValue N0, SDValue N1, SDNode *N);
+ SDValue BuildVPUDIV(SDNode *N);
+ SDValue visitVPSDIV(SDNode *N);
+ SDValue visitVPSDIVLike(SDValue N0, SDValue N1, SDNode *N);
+ SDValue BuildVPSDIV(SDNode *N);
+ SDValue visitVPREM(SDNode *N);
+
SDValue XformToShuffleWithZero(SDNode *N);
bool reassociationCanBreakAddressingModePattern(unsigned Opc,
const SDLoc &DL,
@@ -5161,6 +5169,59 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
return SDValue();
}
+// handles ISD::VP_SREM and ISD::VP_UREM
+SDValue DAGCombiner::visitVPREM(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue Mask = N->getOperand(2);
+ SDValue VL = N->getOperand(3);
+ EVT VT = N->getValueType(0);
+ EVT CCVT =
+ EVT::getVectorVT(*DAG.getContext(), MVT::i1, VT.getVectorElementCount());
+
+ bool IsSigned = (Opcode == ISD::VP_SREM);
+ SDLoc DL(N);
+
+ // fold (vp.urem X, -1) -> select(FX == -1, 0, FX)
+ // Freeze the numerator to avoid a miscompile with an undefined value.
+ if (!IsSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false)) {
+ SDValue F0 = DAG.getFreeze(N0);
+ SDValue EqualsNeg1 = DAG.getSetCCVP(DL, CCVT, F0, N1, ISD::SETEQ, Mask, VL);
+ return DAG.getNode(ISD::VP_SELECT, DL, VT, EqualsNeg1,
+ DAG.getConstant(0, DL, VT), F0, VL);
+ }
+
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+
+ // If X/C can be simplified by the division-by-constant logic, lower
+ // X%C to the equivalent of X-X/C*C.
+ // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
+ // speculative DIV must not cause a DIVREM conversion. We guard against this
+ // by skipping the simplification if isIntDivCheap(). When div is not cheap,
+ // combine will not return a DIVREM. Regardless, checking cheapness here
+ // makes sense since the simplification results in fatter code.
+ if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
+ SDValue OptimizedDiv =
+ IsSigned ? visitVPSDIVLike(N0, N1, N) : visitVPUDIVLike(N0, N1, N);
+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
+ // If the equivalent Div node also exists, update its users.
+ unsigned DivOpcode = IsSigned ? ISD::VP_SDIV : ISD::VP_UDIV;
+ if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
+ {N0, N1, Mask, VL}))
+ CombineTo(DivNode, OptimizedDiv);
+ SDValue Mul =
+ DAG.getNode(ISD::VP_MUL, DL, VT, OptimizedDiv, N1, Mask, VL);
+ SDValue Sub = DAG.getNode(ISD::VP_SUB, DL, VT, N0, Mul, Mask, VL);
+ AddToWorklist(OptimizedDiv.getNode());
+ AddToWorklist(Mul.getNode());
+ return Sub;
+ }
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitMULHS(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -27219,6 +27280,268 @@ SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::BuildVPUDIV(SDNode *N) {
+ // when optimising for minimum size, we don't want to expand a div to a mul
+ // and a shift.
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
+ return SDValue();
+
+ SmallVector<SDNode *, 8> Built;
+ if (SDValue S = TLI.BuildVPUDIV(N, DAG, LegalOperations, Built)) {
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+ }
+
+ return SDValue();
+}
+
+/// Given an ISD::VP_SDIV node expressing a divide by constant, return
+/// a DAG expression to select that will generate the same value by multiplying
+/// by a magic number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+SDValue DAGCombiner::BuildVPSDIV(SDNode *N) {
+ // when optimising for minimum size, we don't want to expand a div to a mul
+ // and a shift.
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
+ return SDValue();
+
+ SmallVector<SDNode *, 8> Built;
+ if (SDValue S = TLI.BuildVPSDIV(N, DAG, LegalOperations, Built)) {
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitVPUDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue Mask = N->getOperand(2);
+ SDValue VL = N->getOperand(3);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ // fold (vp.udiv X, -1) -> vp.select(X == -1, 1, 0)
+ if (N1C && N1C->isAllOnes()) {
+ EVT CCVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ VT.getVectorElementCount());
+ return DAG.getNode(ISD::VP_SELECT, DL, VT,
+ DAG.getSetCCVP(DL, CCVT, N0, N1, ISD::SETEQ, Mask, VL),
+ DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
+ VL);
+ }
+
+ if (SDValue V = visitVPUDIVLike(N0, N1, N)) {
+ // If the corresponding remainder node exists, update its users with
+ // (Dividend - (Quotient * Divisor).
+ if (SDNode *RemNode = DAG.getNodeIfExists(ISD::VP_UREM, N->getVTList(),
+ {N0, N1, Mask, VL})) {
+ SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, V, N1, Mask, VL);
+ SDValue Sub = DAG.getNode(ISD::VP_SUB, DL, VT, N0, Mul, Mask, VL);
+ AddToWorklist(Mul.getNode());
+ AddToWorklist(Sub.getNode());
+ CombineTo(RemNode, Sub);
+ }
+ return V;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitVPUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
+ SDLoc DL(N);
+ SDValue Mask = N->getOperand(2);
+ SDValue VL = N->getOperand(3);
+ EVT VT = N->getValueType(0);
+
+ // fold (vp.udiv x, (1 << c)) -> vp.lshr(x, c)
+ if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+ DAG.isKnownToBeAPowerOfTwo(N1)) {
+ SDValue LogBase2 = BuildLogBase2(N1, DL);
+ AddToWorklist(LogBase2.getNode());
+
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
+ AddToWorklist(Trunc.getNode());
+ return DAG.getNode(ISD::VP_SRL, DL, VT, N0, Trunc, Mask, VL);
+ }
+
+ // fold (vp.udiv x, (vp.shl c, y)) -> vp.lshr(x, vp.add(log2(c)+y)) iff c is
+ // power of 2
+ if (N1.getOpcode() == ISD::VP_SHL && N1->getOperand(2) == Mask &&
+ N1->getOperand(3) == VL) {
+ SDValue N10 = N1.getOperand(0);
+ if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
+ DAG.isKnownToBeAPowerOfTwo(N10)) {
+ SDValue LogBase2 = BuildLogBase2(N10, DL);
+ AddToWorklist(LogBase2.getNode());
+
+ EVT ADDVT = N1.getOperand(1).getValueType();
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
+ AddToWorklist(Trunc.getNode());
+ SDValue Add = DAG.getNode(ISD::VP_ADD, DL, ADDVT, N1.getOperand(1), Trunc,
+ Mask, VL);
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::VP_SRL, DL, VT, N0, Add, Mask, VL);
+ }
+ }
+
+ // fold (vp.udiv x, Splat(shl c, y)) -> vp.lshr(x, add(log2(c)+y)) iff c is
+ // power of 2
+ if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == ISD::SHL) {
+ SDValue N0SHL = N10.getOperand(0);
+ if (isa<ConstantSDNode>(N0SHL) && DAG.isKnownToBeAPowerOfTwo(N0SHL)) {
+ SDValue LogBase2 = BuildLogBase2(N0SHL, DL);
+ AddToWorklist(LogBase2.getNode());
+
+ EVT ADDVT = N10.getOperand(1).getValueType();
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
+ AddToWorklist(Trunc.getNode());
+ SDValue Add =
+ DAG.getNode(ISD::ADD, DL, ADDVT, N10.getOperand(1), Trunc);
+ AddToWorklist(Add.getNode());
+ SDValue Splat = DAG.getSplatVector(VT, DL, Add);
+ AddToWorklist(Splat.getNode());
+ return DAG.getNode(ISD::VP_SRL, DL, VT, N0, Splat, Mask, VL);
+ }
+ }
+ }
+
+ // fold (udiv x, c) -> alternate
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (isConstantOrConstantVector(N1) &&
+ !TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue Op = BuildVPUDIV(N))
+ return Op;
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitVPSDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue Mask = N->getOperand(2);
+ SDValue VL = N->getOperand(3);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // fold (vp.sdiv X, -1) -> 0-X
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ if (N1C && N1C->isAllOnes())
+ return DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT), N0,
+ Mask, VL);
+
+ // fold (vp.sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
+ if (N1C && N1C->getAPIntValue().isMinSignedValue()) {
+ EVT CCVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ VT.getVectorElementCount());
+ return DAG.getNode(ISD::VP_SELECT, DL, VT,
+ DAG.getSetCCVP(DL, CCVT, N0, N1, ISD::SETEQ, Mask, VL),
+ DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
+ VL);
+ }
+
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // vp.udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::VP_UDIV, DL, N1.getValueType(), N0, N1, Mask, VL);
+
+ if (SDValue V = visitVPSDIVLike(N0, N1, N)) {
+ // If the corresponding remainder node exists, update its users with
+ // (Dividend - (Quotient * Divisor).
+ if (SDNode *RemNode = DAG.getNodeIfExists(ISD::VP_SREM, N->getVTList(),
+ {N0, N1, Mask, VL})) {
+ SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, V, N1, Mask, VL);
+ SDValue Sub = DAG.getNode(ISD::VP_SUB, DL, VT, N0, Mul, Mask, VL);
+ AddToWorklist(Mul.getNode());
+ AddToWorklist(Sub.getNode());
+ CombineTo(RemNode, Sub);
+ }
+ return V;
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitVPSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
+ SDLoc DL(N);
+ SDValue Mask = N->getOperand(2);
+ SDValue VL = N->getOperand(3);
+ EVT VT = N->getValueType(0);
+ unsigned BitWidth = VT.getScalarSizeInBits();
+
+ // fold (vp.sdiv X, V of pow 2)
+ if (N1.getOpcode() == ISD::SPLAT_VECTOR &&
+ isDivisorPowerOfTwo(N1.getOperand(0))) {
+ // Create constants that are functions of the shift amount value.
+ SDValue N = N1.getOperand(0);
+ EVT CCVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ VT.getVectorElementCount());
+ EVT ScalarShiftAmtTy =
+ getShiftAmountTy(N0.getValueType().getVectorElementType());
+ SDValue Bits = DAG.getConstant(BitWidth, DL, ScalarShiftAmtTy);
+ SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT.getVectorElementType(), N);
+ C1 = DAG.getZExtOrTrunc(C1, DL, ScalarShiftAmtTy);
+ SDValue Inexact = DAG.getNode(ISD::SUB, DL, ScalarShiftAmtTy, Bits, C1);
+ if (!isa<ConstantSDNode>(Inexact))
+ return SDValue();
+
+ // Splat the sign bit into the register
+ EVT VecShiftAmtTy = EVT::getVectorVT(*DAG.getContext(), ScalarShiftAmtTy,
+ VT.getVectorElementCount());
+ SDValue Sign =
+ DAG.getNode(ISD::VP_SRA, DL, VT, N0,
+ DAG.getConstant(BitWidth - 1, DL, VecShiftAmtTy), Mask, VL);
+ AddToWorklist(Sign.getNode());
+
+ // Add N0, ((N0 < 0) ? abs(N1) - 1 : 0);
+ Inexact = DAG.getSplat(VT, DL, Inexact);
+ C1 = DAG.getSplat(VT, DL, C1);
+ SDValue Srl = DAG.getNode(ISD::VP_SRL, DL, VT, Sign, Inexact, Mask, VL);
+ AddToWorklist(Srl.getNode());
+ SDValue Add = DAG.getNode(ISD::VP_ADD, DL, VT, N0, Srl, Mask, VL);
+ AddToWorklist(Add.getNode());
+ SDValue Sra = DAG.getNode(ISD::VP_SRA, DL, VT, Add, C1, Mask, VL);
+ AddToWorklist(Sra.getNode());
+
+ // Special case: (sdiv X, 1) -> X
+ // Special Case: (sdiv X, -1) -> 0-X
+ SDValue One = DAG.getConstant(1, DL, VT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
+ SDValue IsOne = DAG.getSetCCVP(DL, CCVT, N1, One, ISD::SETEQ, Mask, VL);
+ SDValue IsAllOnes =
+ DAG.getSetCCVP(DL, CCVT, N1, AllOnes, ISD::SETEQ, Mask, VL);
+ SDValue IsOneOrAllOnes =
+ DAG.getNode(ISD::VP_OR, DL, CCVT, IsOne, IsAllOnes, Mask, VL);
+ Sra = DAG.getNode(ISD::VP_SELECT, DL, VT, IsOneOrAllOnes, N0, Sra, VL);
+
+ // If dividing by a positive value, we're done. Otherwise, the result must
+ // be negated.
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue Sub = DAG.getNode(ISD::VP_SUB, DL, VT, Zero, Sra, Mask, VL);
+
+ // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
+ SDValue IsNeg = DAG.getSetCCVP(DL, CCVT, N1, Zero, ISD::SETLT, Mask, VL);
+ SDValue Res = DAG.getNode(ISD::VP_SELECT, DL, VT, IsNeg, Sub, Sra, VL);
+ return Res;
+ }
+
+ // If integer divide is expensive and we satisfy the requirements, emit an
+ // alternate sequence. Targets may check function attributes for size/speed
+ // trade-offs.
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (isConstantOrConstantVector(N1) &&
+ !TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue Op = BuildVPSDIV(N))
+ return Op;
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitVPOp(SDNode *N) {
if (N->getOpcode() == ISD::VP_GATHER)
@@ -27262,6 +27585,13 @@ SDValue DAGCombiner::visitVPOp(SDNode *N) {
return visitMUL<VPMatchContext>(N);
case ISD::VP_SUB:
return foldSubCtlzNot<VPMatchContext>(N, DAG);
+ case ISD::VP_UDIV:
+ return visitVPUDIV(N);
+ case ISD::VP_SDIV:
+ return visitVPSDIV(N);
+ case ISD::VP_UREM:
+ case ISD::VP_SREM:
+ return visitVPREM(N);
default:
break;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index adfb96041c5c0..82a2500ff386d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6492,6 +6492,121 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ADD, dl, VT, Q, T);
}
+/// Given an ISD::VP_SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+SDValue TargetLowering::BuildVPSDIV(SDNode *N, SelectionDAG &DAG,
+ bool IsAfterLegalization,
+ SmallVectorImpl<SDNode *> &Created) const {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+ unsigned EltBits = VT.getScalarSizeInBits();
+
+ // Check to see if we can do this.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
+
+ auto BuildSDIVPattern = [&](ConstantSDNode *C) {
+ if (C->isZero())
+ return false;
+
+ const APInt &Divisor = C->getAPIntValue();
+ SignedDivisionByConstantInfo magics =
+ SignedDivisionByConstantInfo::get(Divisor);
+ int NumeratorFactor = 0;
+ int ShiftMask = -1;
+
+ if (Divisor.isOne() || Divisor.isAllOnes()) {
+ // If d is +1/-1, we just multiply the numerator by +1/-1.
+ NumeratorFactor = Divisor.getSExtValue();
+ magics.Magic = 0;
+ magics.ShiftAmount = 0;
+ ShiftMask = 0;
+ } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
+ // If d > 0 and m < 0, add the numerator.
+ NumeratorFactor = 1;
+ } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
+ // If d < 0 and m > 0, subtract the numerator.
+ NumeratorFactor = -1;
+ }
+
+ MagicFactors.push_back(DAG.getConstant(magics.Magic, DL, SVT));
+ Factors.push_back(DAG.getSignedConstant(NumeratorFactor, DL, SVT));
+ Shifts.push_back(DAG.getConstant(magics.ShiftAmount, DL, ShSVT));
+ ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, DL, SVT));
+ return true;
+ };
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue Mask = N->getOperand(2);
+ SDValue VL = N->getOperand(3);
+
+ // Collect the shifts / magic values from each element.
+ if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
+ return SDValue();
+
+ SDValue MagicFactor, Factor, Shift, ShiftMask;
+ if (N1.getOpcode() == ISD::BUILD_VECTOR) {
+ MagicFactor = DAG.getBuildVector(VT, DL, MagicFactors);
+ Factor = DAG.getBuildVector(VT, DL, Factors);
+ Shift = DAG.getBuildVector(ShVT, DL, Shifts);
+ ShiftMask = DAG.getBuildVector(VT, DL, ShiftMasks);
+ } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
+ assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
+ Shifts.size() == 1 && ShiftMasks.size() == 1 &&
+ "Expected matchUnaryPredicate to return one element for scalable "
+ "vectors");
+ MagicFactor = DAG.getSplatVector(VT, DL, MagicFactors[0]);
+ Factor = DAG.getSplatVector(VT, DL, Factors[0]);
+ Shift = DAG.getSplatVector(ShVT, DL, Shifts[0]);
+ ShiftMask = DAG.getSplatVector(VT, DL, ShiftMasks[0]);
+ } else {
+ assert(isa<ConstantSDNode>(N1) && "Expected a constant");
+ MagicFactor = MagicFactors[0];
+ Factor = Factors[0];
+ Shift = Shifts[0];
+ ShiftMask = ShiftMasks[0];
+ }
+
+ // Multiply the numerator (operand 0) by the magic value.
+ auto GetMULHS = [&](SDValue X, SDValue Y) {
+ if (isOperationLegalOrCustom(ISD::VP_MULHS, VT, IsAfterLegalization))
+ return DAG.getNode(ISD::VP_MULHS, DL, VT, X, Y, Mask, VL);
+ return SDValue();
+ };
+
+ SDValue Q = GetMULHS(N0, MagicFactor);
+ if (!Q)
+ return SDValue();
+
+ Created.push_back(Q.getNode());
+
+ // (Optionally) Add/subtract the numerator using Factor.
+ Factor = DAG.getNode(ISD::VP_MUL, DL, VT, N0, Factor, Mask, VL);
+ Created.push_back(Factor.getNode());
+ Q = DAG.getNode(ISD::VP_ADD, DL, VT, Q, Factor, Mask, VL);
+ Created.push_back(Q.getNode());
+
+ // Shift right algebraic by shift value.
+ Q = DAG.getNode(ISD::VP_SRA, DL, VT, Q, Shift, Mask, VL);
+ Created.push_back(Q.getNode());
+
+ // Extract the sign bit, mask it and add it to the quotient.
+ SDValue SignShift = DAG.getConstant(EltBits - 1, DL, ShVT);
+ SDValue T = DAG.getNode(ISD::VP_SRL, DL, VT, Q, SignShift, Mask, VL);
+ Created.push_back(T.getNode());
+ T = DAG.getNode(ISD::VP_AND, DL, VT, T, ShiftMask, Mask, VL);
+ Created.push_back(T.getNode());
+ return DAG.getNode(ISD::VP_ADD, DL, VT, Q, T, Mask, VL);
+}
+
/// Given an ISD::UDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
@@ -6692,6 +6807,144 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
return DAG.getSelect(dl, VT, IsOne, N0, Q);
}
+/// Given an ISD::VP_UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+SDValue TargetLowering::BuildVPUDIV(SDNode *N, SelectionDAG &DAG,
+ bool IsAfterLegalization,
+ SmallVectorImpl<SDNode *> &Created) const {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+ unsigned EltBits = VT.getScalarSizeInBits();
+
+ // Check to see if we can do this.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
+
+ SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
+
+ auto BuildUDIVPattern = [&](ConstantSDNode *C) {
+ if (C->isZero())
+ return false;
+ // FIXME: We should use a narrower constant when the upper
+ // bits are known to be zero.
+ const APInt &Divisor = C->getAPIntValue();
+ SDValue PreShift, MagicFactor, NPQFactor, PostShift;
+
+ // Magic algorithm doesn't work for division by 1. We need to emit a select
+ // at the end.
+ if (Divisor.isOne()) {
+ PreShift = PostShift = DAG.getUNDEF(ShSVT);
+ MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
+ } else {
+ UnsignedDivisionByConstantInfo magics =
+ UnsignedDivisionByConstantInfo::get(Divisor);
+
+ MagicFactor = DAG.getConstant(magics.Magic, DL, SVT);
+
+ assert(magics.PreShift < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ assert(magics.PostShift < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
+ PreShift = DAG.getConstant(magics.PreShift, DL, ShSVT);
+ PostShift = DAG.getConstant(magics.PostShift, DL, ShSVT);
+ NPQFactor = DAG.getConstant(
+ magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
+ : APInt::getZero(EltBits),
+ DL, SVT);
+ UseNPQ |= magics.IsAdd;
+ UsePreShift |= magics.PreShift != 0;
+ UsePostShift |= magics.PostShift != 0;
+ }
+
+ PreShifts.push_back(PreShift);
+ MagicFactors.push_back(MagicFactor);
+ NPQFactors.push_back(NPQFactor);
+ PostShifts.push_back(PostShift);
+ return true;
+ };
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue Mask = N->getOperand(2);
+ SDValue VL = N->getOperand(3);
+
+ // Collect the shifts/magic values from each element.
+ if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
+ return SDValue();
+
+ SDValue PreShift, PostShift, MagicFactor, NPQFactor;
+ if (N1.getOpcode() == ISD::BUILD_VECTOR) {
+ PreShift = DAG.getBuildVector(ShVT, DL, PreShifts);
+ MagicFactor = DAG.getBuildVector(VT, DL, MagicFactors);
+ NPQFactor = DAG.getBuildVector(VT, DL, NPQFactors);
+ PostShift = DAG.getBuildVector(ShVT, DL, PostShifts);
+ } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
+ assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
+ NPQFactors.size() == 1 && PostShifts.size() == 1 &&
+ "Expected matchUnaryPredicate to return one for scalable vectors");
+ PreShift = DAG.getSplatVector(ShVT, DL, PreShifts[0]);
+ MagicFactor = DAG.getSplatVector(VT, DL, MagicFactors[0]);
+ NPQFactor = DAG.getSplatVector(VT, DL, NPQFactors[0]);
+ PostShift = DAG.getSplatVector(ShVT, DL, PostShifts[0]);
+ } else {
+ assert(isa<ConstantSDNode>(N1) && "Expected a constant");
+ PreShift = PreShifts[0];
+ MagicFactor = MagicFactors[0];
+ PostShift = PostShifts[0];
+ }
+
+ SDValue Q = N0;
+ if (UsePreShift) {
+ Q = DAG.getNode(ISD::VP_SRL, DL, VT, Q, PreShift, Mask, VL);
+ Created.push_back(Q.getNode());
+ }
+
+ auto GetMULHU = [&](SDValue X, SDValue Y) {
+ if (isOperationLegalOrCustom(ISD::VP_MULHU, VT, IsAfterLegalization))
+ return DAG.getNode(ISD::VP_MULHU, DL, VT, X, Y, Mask, VL);
+ return SDValue();
+ };
+
+ // Multiply the numerator (operand 0) by the magic value.
+ Q = GetMULHU(Q, MagicFactor);
+ if (!Q)
+ return SDValue();
+
+ Created.push_back(Q.getNode());
+
+ if (UseNPQ) {
+ SDValue NPQ = DAG.getNode(ISD::VP_SUB, DL, VT, N0, Q, Mask, VL);
+ Created.push_back(NPQ.getNode());
+
+ // For vectors we might have a mix of non-NPQ/NPQ paths, so use
+ // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
+ NPQ = GetMULHU(NPQ, NPQFactor);
+ Created.push_back(NPQ.getNode());
+
+ Q = DAG.getNode(ISD::VP_ADD, DL, VT, NPQ, Q, Mask, VL);
+ Created.push_back(Q.getNode());
+ }
+
+ if (UsePostShift) {
+ Q = DAG.getNode(ISD::VP_SRL, DL, VT, Q, PostShift, Mask, VL);
+ Created.push_back(Q.getNode());
+ }
+
+ EVT SetCCVT =
+ EVT::getVectorVT(*DAG.getContext(), MVT::i1, VT.getVectorElementCount());
+ SDValue One = DAG.getConstant(1, DL, VT);
+ SDValue IsOne = DAG.getSetCCVP(DL, SetCCVT, N1, One, ISD::SETEQ, Mask, VL);
+ return DAG.getNode(ISD::VP_SELECT, DL, VT, IsOne, N0, Q, VL);
+}
+
/// If all values in Values that *don't* match the predicate are same 'splat'
/// value, then replace all values with that splat value.
/// Else, if AlternativeReplacement was provided, then replace all values that
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 3a4f1fefa9445..72b6ba0c2d8ce 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -871,6 +871,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(IntegerVPOps, VT, Custom);
+ // Zve64* does not support VP_MULHU/S with nxvXi64.
+ if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) {
+ setOperationAction({ISD::VP_MULHU, ISD::VP_MULHS}, VT, Expand);
+ }
+
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
@@ -1300,6 +1305,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(IntegerVPOps, VT, Custom);
+ // Zve64* does not support VP_MULHU/S with nxvXi64.
+ if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) {
+ setOperationAction({ISD::VP_MULHU, ISD::VP_MULHS}, VT, Expand);
+ }
+
if (Subtarget.hasStdExtZvkb())
setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll
new file mode 100644
index 0000000000000..f78a0ec7f2378
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll
@@ -0,0 +1,1540 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
+
+declare <vscale x 8 x i8> @llvm.vp.udiv.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+declare <vscale x 4 x i16> @llvm.vp.udiv.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+declare <vscale x 2 x i32> @llvm.vp.udiv.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+declare <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+declare <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 8 x i8> @llvm.vp.urem.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+declare <vscale x 4 x i16> @llvm.vp.urem.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+declare <vscale x 2 x i32> @llvm.vp.urem.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 8 x i8> @llvm.vp.srem.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+declare <vscale x 4 x i16> @llvm.vp.srem.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+declare <vscale x 2 x i32> @llvm.vp.srem.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 8 x i8> @llvm.vp.shl.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+declare <vscale x 4 x i16> @llvm.vp.shl.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+declare <vscale x 2 x i32> @llvm.vp.shl.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.shl.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+
+
+define <vscale x 8 x i8> @vpudiv_by_max_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_max_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 8 x i8> undef, i8 255, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.udiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 4 x i16> @vpudiv_by_max_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_max_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 4 x i16> undef, i16 65535, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.udiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 2 x i32> @vpudiv_by_max_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_max_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 2 x i32> undef, i32 4294967295, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.udiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 1 x i64> @vpudiv_by_max_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_max_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 18446744073709551615, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 8 x i8> @fold_vpudiv_vpurem_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fold_vpudiv_vpurem_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t
+; CHECK-NEXT: li a0, -128
+; CHECK-NEXT: vmul.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 8 x i8> undef, i8 128, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.udiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %u = call <vscale x 8 x i8> @llvm.vp.urem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %x = add <vscale x 8 x i8> %v, %u
+ ret <vscale x 8 x i8> %x
+}
+
+define <vscale x 4 x i16> @fold_vpudiv_vpurem_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fold_vpudiv_vpurem_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 14, v0.t
+; CHECK-NEXT: lui a0, 4
+; CHECK-NEXT: vmul.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 4 x i16> undef, i16 16384, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.udiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %u = call <vscale x 4 x i16> @llvm.vp.urem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %x = add <vscale x 4 x i16> %v, %u
+ ret <vscale x 4 x i16> %x
+}
+
+define <vscale x 2 x i32> @fold_vpudiv_vpurem_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fold_vpudiv_vpurem_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 14, v0.t
+; CHECK-NEXT: lui a0, 4
+; CHECK-NEXT: vmul.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 2 x i32> undef, i32 16384, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.udiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %u = call <vscale x 2 x i32> @llvm.vp.urem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %x = add <vscale x 2 x i32> %v, %u
+ ret <vscale x 2 x i32> %x
+}
+
+define <vscale x 1 x i64> @fold_vpudiv_vpurem_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fold_vpudiv_vpurem_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 14, v0.t
+; CHECK-NEXT: lui a0, 4
+; CHECK-NEXT: vmul.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vadd.vv v8, v9, v8
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 16384, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %u = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %x = add <vscale x 1 x i64> %v, %u
+ ret <vscale x 1 x i64> %x
+}
+
+define <vscale x 8 x i8> @vpudiv_by_shl2_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_shl2_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andi a0, a0, 255
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %sh = shl i8 2, %b
+ %vec = insertelement <vscale x 8 x i8> undef, i8 %sh, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.udiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 4 x i16> @vpudiv_by_shl2_nxv4i16(<vscale x 4 x i16> %va, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_shl2_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 48
+; CHECK-NEXT: srli a0, a0, 48
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %sh = shl i16 2, %b
+ %vec = insertelement <vscale x 4 x i16> undef, i16 %sh, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.udiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 2 x i32> @vpudiv_by_shl2_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_shl2_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 32
+; CHECK-NEXT: srli a0, a0, 32
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %sh = shl i32 2, %b
+ %vec = insertelement <vscale x 2 x i32> undef, i32 %sh, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.udiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 1 x i64> @vpudiv_by_shl2_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_shl2_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %sh = shl i64 2, %b
+ %vec = insertelement <vscale x 1 x i64> undef, i64 %sh, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 8 x i8> @vpudiv_by_vpshl2_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_vpshl2_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t
+; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec1 = insertelement <vscale x 8 x i8> undef, i8 4, i32 0
+ %splat1 = shufflevector <vscale x 8 x i8> %vec1, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %vec2 = insertelement <vscale x 8 x i8> undef, i8 %b, i32 0
+ %splat2 = shufflevector <vscale x 8 x i8> %vec2, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %sh = call <vscale x 8 x i8> @llvm.vp.shl.nxv8i8(<vscale x 8 x i8> %splat1, <vscale x 8 x i8> %splat2, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.udiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %sh, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 4 x i16> @vpudiv_by_vpshl2_nxv4i16(<vscale x 4 x i16> %va, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_vpshl2_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t
+; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec1 = insertelement <vscale x 4 x i16> undef, i16 4, i32 0
+ %splat1 = shufflevector <vscale x 4 x i16> %vec1, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %vec2 = insertelement <vscale x 4 x i16> undef, i16 %b, i32 0
+ %splat2 = shufflevector <vscale x 4 x i16> %vec2, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %sh = call <vscale x 4 x i16> @llvm.vp.shl.nxv4i16(<vscale x 4 x i16> %splat1, <vscale x 4 x i16> %splat2, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.udiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %sh, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 2 x i32> @vpudiv_by_vpshl2_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_vpshl2_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t
+; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec1 = insertelement <vscale x 2 x i32> undef, i32 4, i32 0
+ %splat1 = shufflevector <vscale x 2 x i32> %vec1, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %vec2 = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
+ %splat2 = shufflevector <vscale x 2 x i32> %vec2, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %sh = call <vscale x 2 x i32> @llvm.vp.shl.nxv2i32(<vscale x 2 x i32> %splat1, <vscale x 2 x i32> %splat2, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.udiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %sh, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 1 x i64> @vpudiv_by_vpshl2_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_vpshl2_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t
+; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec1 = insertelement <vscale x 1 x i64> undef, i64 4, i32 0
+ %splat1 = shufflevector <vscale x 1 x i64> %vec1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %vec2 = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
+ %splat2 = shufflevector <vscale x 1 x i64> %vec2, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %sh = call <vscale x 1 x i64> @llvm.vp.shl.nxv1i64(<vscale x 1 x i64> %splat1, <vscale x 1 x i64> %splat2, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %sh, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 8 x i8> @vpudiv_by_const_no_add_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_const_no_add_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 5
+; CHECK-NEXT: li a1, -51
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
+; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 8 x i8> undef, i8 5, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.udiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 4 x i16> @vpudiv_by_const_no_add_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_const_no_add_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 5
+; CHECK-NEXT: lui a1, 1048573
+; CHECK-NEXT: addiw a1, a1, -819
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
+; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 4 x i16> undef, i16 5, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.udiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 2 x i32> @vpudiv_by_const_no_add_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_const_no_add_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 5
+; CHECK-NEXT: lui a1, 838861
+; CHECK-NEXT: addiw a1, a1, -819
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
+; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 2 x i32> undef, i32 5, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.udiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 1 x i64> @vpudiv_by_const_no_add_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_const_no_add_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, %hi(.LCPI19_0)
+; CHECK-NEXT: ld a1, %lo(.LCPI19_0)(a1)
+; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 5
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
+; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 5, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 8 x i8> @vpudiv_by_const_with_add_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_const_with_add_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 7
+; CHECK-NEXT: li a1, 37
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t
+; CHECK-NEXT: li a0, -128
+; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t
+; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
+; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 8 x i8> undef, i8 7, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.udiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 4 x i16> @vpudiv_by_const_with_add_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_const_with_add_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 7
+; CHECK-NEXT: lui a1, 2
+; CHECK-NEXT: addiw a1, a1, 1171
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 1048568
+; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t
+; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
+; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 4 x i16> undef, i16 7, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.udiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 2 x i32> @vpudiv_by_const_with_add_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_const_with_add_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 7
+; CHECK-NEXT: lui a1, 149797
+; CHECK-NEXT: addiw a1, a1, -1755
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t
+; CHECK-NEXT: lui a0, 524288
+; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t
+; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
+; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 2 x i32> undef, i32 7, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.udiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 1 x i64> @vpudiv_by_const_with_add_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_const_with_add_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
+; CHECK-NEXT: ld a1, %lo(.LCPI23_0)(a1)
+; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 7
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: slli a0, a0, 63
+; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t
+; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t
+; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
+; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 7, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 8 x i8> @vpsdiv_by_neg1_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_by_neg1_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 8 x i8> undef, i8 -1, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 1 x i64> @vpsdiv_by_neg1_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_by_neg1_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 -1, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 4 x i16> @vpsdiv_by_neg1_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_by_neg1_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 4 x i16> undef, i16 -1, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 2 x i32> @vpsdiv_by_neg1_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_by_neg1_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 2 x i32> undef, i32 -1, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 8 x i8> @vpsdiv_by_min_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_by_min_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -128
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmseq.vx v0, v8, a1, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 8 x i8> undef, i8 -128, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 1 x i64> @vpsdiv_by_min_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_by_min_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: slli a1, a1, 63
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vmseq.vx v0, v8, a1, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 -9223372036854775808, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 4 x i16> @vpsdiv_by_min_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_by_min_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 1048568
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmseq.vx v0, v8, a1, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 4 x i16> undef, i16 -32768, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 2 x i32> @vpsdiv_by_min_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_by_min_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 524288
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmseq.vx v0, v8, a1, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 2 x i32> undef, i32 -2147483648, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 4 x i16> @vpsdiv_pow2_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_pow2_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v10, 4
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vmor.mm v10, v10, v11
+; CHECK-NEXT: vsra.vi v11, v8, 15, v0.t
+; CHECK-NEXT: vsrl.vi v11, v11, 14, v0.t
+; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t
+; CHECK-NEXT: vsra.vi v11, v11, 2, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v11, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmsgt.vi v0, v11, 4, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 4 x i16> undef, i16 4, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 8 x i8> @vpsdiv_pow2_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_pow2_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v10, 4
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vmor.mm v10, v10, v11
+; CHECK-NEXT: vsra.vi v11, v8, 7, v0.t
+; CHECK-NEXT: vsrl.vi v11, v11, 6, v0.t
+; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t
+; CHECK-NEXT: vsra.vi v11, v11, 2, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v11, 0
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmsgt.vi v0, v11, 4, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 8 x i8> undef, i8 4, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 2 x i32> @vpsdiv_pow2_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_pow2_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v10, 4
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vmor.mm v10, v10, v11
+; CHECK-NEXT: vsra.vi v11, v8, 31, v0.t
+; CHECK-NEXT: vsrl.vi v11, v11, 30, v0.t
+; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t
+; CHECK-NEXT: vsra.vi v11, v11, 2, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v11, 0
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmsgt.vi v0, v11, 4, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 2 x i32> undef, i32 4, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 1 x i64> @vpsdiv_pow2_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_pow2_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v10, 4
+; CHECK-NEXT: li a1, 63
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vsra.vx v11, v8, a1, v0.t
+; CHECK-NEXT: li a1, 62
+; CHECK-NEXT: vsrl.vx v11, v11, a1, v0.t
+; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t
+; CHECK-NEXT: vsra.vi v11, v11, 2, v0.t
+; CHECK-NEXT: vmseq.vi v12, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vmor.mm v0, v10, v12
+; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v11, 0
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vmsgt.vi v0, v11, 4, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 4, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 8 x i8> @vpsdiv_const_no_ashr_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_no_ashr_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
+; CHECK-NEXT: li a0, 86
+; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t
+; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 8 x i8> undef, i8 3, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 4 x i16> @vpsdiv_const_no_ashr_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_no_ashr_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: addiw a0, a0, 1366
+; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 15, v0.t
+; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 4 x i16> undef, i16 3, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 2 x i32> @vpsdiv_const_no_ashr_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_no_ashr_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
+; CHECK-NEXT: lui a0, 349525
+; CHECK-NEXT: addiw a0, a0, 1366
+; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 31, v0.t
+; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 2 x i32> undef, i32 3, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 1 x i64> @vpsdiv_const_no_ashr_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_no_ashr_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, %hi(.LCPI39_0)
+; CHECK-NEXT: ld a1, %lo(.LCPI39_0)(a1)
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
+; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 63
+; CHECK-NEXT: vsrl.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 3, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 8 x i8> @vpsdiv_const_ashr_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_ashr_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
+; CHECK-NEXT: li a0, 103
+; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t
+; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 8 x i8> undef, i8 5, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 4 x i16> @vpsdiv_const_ashr_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_ashr_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
+; CHECK-NEXT: lui a0, 6
+; CHECK-NEXT: addiw a0, a0, 1639
+; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 15, v0.t
+; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 4 x i16> undef, i16 5, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 2 x i32> @vpsdiv_const_ashr_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_ashr_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
+; CHECK-NEXT: lui a0, 419430
+; CHECK-NEXT: addiw a0, a0, 1639
+; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 31, v0.t
+; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 2 x i32> undef, i32 5, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 1 x i64> @vpsdiv_const_ashr_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_ashr_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, %hi(.LCPI43_0)
+; CHECK-NEXT: ld a1, %lo(.LCPI43_0)(a1)
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
+; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t
+; CHECK-NEXT: li a0, 63
+; CHECK-NEXT: vsrl.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 5, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 8 x i8> @vpsdiv_const_add_ashr_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_add_ashr_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t
+; CHECK-NEXT: li a0, -109
+; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsra.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t
+; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 8 x i8> undef, i8 7, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 4 x i16> @vpsdiv_const_add_ashr_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_add_ashr_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t
+; CHECK-NEXT: lui a0, 1048569
+; CHECK-NEXT: addiw a0, a0, -1911
+; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsra.vi v8, v8, 3, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 15, v0.t
+; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 4 x i16> undef, i16 15, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 2 x i32> @vpsdiv_const_add_ashr_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_add_ashr_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t
+; CHECK-NEXT: lui a0, 599186
+; CHECK-NEXT: addiw a0, a0, 1171
+; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsra.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 31, v0.t
+; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 2 x i32> undef, i32 7, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 1 x i64> @vpsdiv_const_add_ashr_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_add_ashr_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, %hi(.LCPI47_0)
+; CHECK-NEXT: ld a1, %lo(.LCPI47_0)(a1)
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: li a0, 1
+; CHECK-NEXT: vmul.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsra.vi v8, v8, 3, v0.t
+; CHECK-NEXT: li a0, 63
+; CHECK-NEXT: vsrl.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 15, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 8 x i8> @vpsdiv_const_sub_ashr_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t
+; CHECK-NEXT: li a0, 109
+; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsra.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t
+; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 8 x i8> undef, i8 -7, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 4 x i16> @vpsdiv_const_sub_ashr_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t
+; CHECK-NEXT: lui a0, 7
+; CHECK-NEXT: addiw a0, a0, 1911
+; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsra.vi v8, v8, 3, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 15, v0.t
+; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 4 x i16> undef, i16 -15, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 2 x i32> @vpsdiv_const_sub_ashr_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t
+; CHECK-NEXT: lui a0, 449390
+; CHECK-NEXT: addiw a0, a0, -1171
+; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsra.vi v8, v8, 2, v0.t
+; CHECK-NEXT: vsrl.vi v9, v8, 31, v0.t
+; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 2 x i32> undef, i32 -7, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 1 x i64> @vpsdiv_const_sub_ashr_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, %hi(.LCPI51_0)
+; CHECK-NEXT: ld a1, %lo(.LCPI51_0)(a1)
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: vmul.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t
+; CHECK-NEXT: li a0, 63
+; CHECK-NEXT: vsrl.vx v9, v8, a0, v0.t
+; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 -3, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vpurem_by_max_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpurem_by_max_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t
+; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 18446744073709551615, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 4 x i16> @vpurem_by_max_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpurem_by_max_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t
+; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 4 x i16> undef, i16 65535, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.urem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 8 x i8> @vpurem_by_max_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpurem_by_max_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t
+; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 8 x i8> undef, i8 255, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.urem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 2 x i32> @vpurem_by_max_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpurem_by_max_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t
+; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 2 x i32> undef, i32 4294967295, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.urem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 1 x i64> @vpurem_by_const_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpurem_by_const_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: lui a1, %hi(.LCPI56_0)
+; CHECK-NEXT: ld a1, %lo(.LCPI56_0)(a1)
+; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v10, 5
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vmulhu.vx v11, v8, a1, v0.t
+; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t
+; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0
+; CHECK-NEXT: li a0, 5
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmul.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 5, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 4 x i16> @vpurem_by_const_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpurem_by_const_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v10, 5
+; CHECK-NEXT: lui a1, 1048573
+; CHECK-NEXT: addiw a1, a1, -819
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmulhu.vx v11, v8, a1, v0.t
+; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t
+; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0
+; CHECK-NEXT: li a0, 5
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmul.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 4 x i16> undef, i16 5, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.urem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 8 x i8> @vpurem_by_const_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpurem_by_const_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v10, 5
+; CHECK-NEXT: li a1, -51
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmulhu.vx v11, v8, a1, v0.t
+; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t
+; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0
+; CHECK-NEXT: li a0, 5
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmul.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 8 x i8> undef, i8 5, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.urem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 2 x i32> @vpurem_by_const_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpurem_by_const_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v10, 5
+; CHECK-NEXT: lui a1, 838861
+; CHECK-NEXT: addiw a1, a1, -819
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmulhu.vx v11, v8, a1, v0.t
+; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t
+; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0
+; CHECK-NEXT: li a0, 5
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmul.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 2 x i32> undef, i32 5, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.urem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 1 x i64> @vpsrem_by_const_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsrem_by_const_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, %hi(.LCPI60_0)
+; CHECK-NEXT: ld a1, %lo(.LCPI60_0)(a1)
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
+; CHECK-NEXT: vmulh.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t
+; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t
+; CHECK-NEXT: li a0, 63
+; CHECK-NEXT: vsrl.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vand.vi v10, v10, -1, v0.t
+; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t
+; CHECK-NEXT: li a0, 5
+; CHECK-NEXT: vmul.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 5, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 4 x i16> @vpsrem_by_const_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsrem_by_const_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
+; CHECK-NEXT: lui a0, 6
+; CHECK-NEXT: addiw a0, a0, 1639
+; CHECK-NEXT: vmulh.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t
+; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t
+; CHECK-NEXT: vsrl.vi v10, v9, 15, v0.t
+; CHECK-NEXT: vand.vi v10, v10, -1, v0.t
+; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t
+; CHECK-NEXT: li a0, 5
+; CHECK-NEXT: vmul.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 4 x i16> undef, i16 5, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.srem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 8 x i8> @vpsrem_by_const_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsrem_by_const_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
+; CHECK-NEXT: li a0, 103
+; CHECK-NEXT: vmulh.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t
+; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t
+; CHECK-NEXT: vsrl.vi v10, v9, 7, v0.t
+; CHECK-NEXT: vand.vi v10, v10, -1, v0.t
+; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t
+; CHECK-NEXT: li a0, 5
+; CHECK-NEXT: vmul.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 8 x i8> undef, i8 5, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.srem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 2 x i32> @vpsrem_by_const_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsrem_by_const_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
+; CHECK-NEXT: lui a0, 419430
+; CHECK-NEXT: addiw a0, a0, 1639
+; CHECK-NEXT: vmulh.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t
+; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t
+; CHECK-NEXT: vsrl.vi v10, v9, 31, v0.t
+; CHECK-NEXT: vand.vi v10, v10, -1, v0.t
+; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t
+; CHECK-NEXT: li a0, 5
+; CHECK-NEXT: vmul.vx v9, v9, a0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 2 x i32> undef, i32 5, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.srem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 8 x i8> @vpudiv_by_1_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_1_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 8 x i8> undef, i8 1, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 4 x i16> @vpudiv_by_1_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_1_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 4 x i16> undef, i16 1, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 2 x i32> @vpudiv_by_1_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_1_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 2 x i32> undef, i32 1, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 1 x i64> @vpudiv_by_1_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_1_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 1, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 8 x i8> @vpsdiv_by_1_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_by_1_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 8 x i8> undef, i8 1, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 4 x i16> @vpsdiv_by_1_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_by_1_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 4 x i16> undef, i16 1, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 2 x i32> @vpsdiv_by_1_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_by_1_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 2 x i32> undef, i32 1, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 1 x i64> @vpsdiv_by_1_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_by_1_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 1, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 8 x i8> @vpurem_by_1_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpurem_by_1_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 8 x i8> undef, i8 1, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.urem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 4 x i16> @vpurem_by_1_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpurem_by_1_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 4 x i16> undef, i16 1, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.urem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 2 x i32> @vpurem_by_1_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpurem_by_1_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 2 x i32> undef, i32 1, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.urem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 1 x i64> @vpurem_by_1_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpurem_by_1_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 1, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 8 x i8> @vpsrem_by_1_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsrem_by_1_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 8 x i8> undef, i8 1, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.srem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 4 x i16> @vpsrem_by_1_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsrem_by_1_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 4 x i16> undef, i16 1, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.srem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 2 x i32> @vpsrem_by_1_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsrem_by_1_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 2 x i32> undef, i32 1, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.srem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 1 x i64> @vpsrem_by_1_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsrem_by_1_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 1, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 8 x i8> @vpsrem_by_neg1_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsrem_by_neg1_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 8 x i8> undef, i8 -1, i32 0
+ %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.srem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 4 x i16> @vpsrem_by_neg1_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsrem_by_neg1_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 4 x i16> undef, i16 -1, i32 0
+ %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.srem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 2 x i32> @vpsrem_by_neg1_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsrem_by_neg1_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 2 x i32> undef, i32 -1, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.srem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 1 x i64> @vpsrem_by_neg1_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsrem_by_neg1_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 -1, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 8 x i8> @vpsdivrem_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdivrem_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 109
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vsub.vv v9, v9, v8, v0.t
+; CHECK-NEXT: vsra.vi v9, v9, 2, v0.t
+; CHECK-NEXT: vsrl.vi v10, v9, 7, v0.t
+; CHECK-NEXT: vand.vi v10, v10, -1, v0.t
+; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vmul.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i8> @llvm.vp.srem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 -7), <vscale x 8 x i1> %m, i32 %evl)
+ %w = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 -7), <vscale x 8 x i1> %m, i32 %evl)
+ %x = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %v, <vscale x 8 x i8> %w, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %x
+}
+
+define <vscale x 8 x i8> @vpudivrem_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudivrem_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 37
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmulhu.vx v9, v8, a1, v0.t
+; CHECK-NEXT: li a0, -128
+; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t
+; CHECK-NEXT: vmulhu.vx v10, v10, a0, v0.t
+; CHECK-NEXT: li a0, 7
+; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t
+; CHECK-NEXT: vsrl.vi v9, v9, 2, v0.t
+; CHECK-NEXT: vmul.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i8> @llvm.vp.urem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 7), <vscale x 8 x i1> %m, i32 %evl)
+ %w = call <vscale x 8 x i8> @llvm.vp.udiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 7), <vscale x 8 x i1> %m, i32 %evl)
+ %x = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %v, <vscale x 8 x i8> %w, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %x
+}
>From b6137759d0766af3b838e3390d7e9ab1204cc3aa Mon Sep 17 00:00:00 2001
From: Jesse Huang <jesse.huang at sifive.com>
Date: Wed, 10 Jan 2024 04:24:20 -0800
Subject: [PATCH 03/10] [RISCV] Set VP_MULH* to Expand on Zve64* and Optimize
BuildVP*DIV
* Set VP_MULHU/VP_MULHS with i64 vector input to Expand on Zve64*
* Moved forward the IsOperationLegalOrCustom check in BuildSDIV/BuildUDIV
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 14 +--
.../CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll | 113 ++++++++++++++++++
2 files changed, 119 insertions(+), 8 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 82a2500ff386d..e2b74408885c0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6507,7 +6507,8 @@ SDValue TargetLowering::BuildVPSDIV(SDNode *N, SelectionDAG &DAG,
unsigned EltBits = VT.getScalarSizeInBits();
// Check to see if we can do this.
- if (!isTypeLegal(VT))
+ if (!isTypeLegal(VT) ||
+ !isOperationLegalOrCustom(ISD::VP_MULHS, VT, IsAfterLegalization))
return SDValue();
SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
@@ -6577,9 +6578,7 @@ SDValue TargetLowering::BuildVPSDIV(SDNode *N, SelectionDAG &DAG,
// Multiply the numerator (operand 0) by the magic value.
auto GetMULHS = [&](SDValue X, SDValue Y) {
- if (isOperationLegalOrCustom(ISD::VP_MULHS, VT, IsAfterLegalization))
- return DAG.getNode(ISD::VP_MULHS, DL, VT, X, Y, Mask, VL);
- return SDValue();
+ return DAG.getNode(ISD::VP_MULHS, DL, VT, X, Y, Mask, VL);
};
SDValue Q = GetMULHS(N0, MagicFactor);
@@ -6822,7 +6821,8 @@ SDValue TargetLowering::BuildVPUDIV(SDNode *N, SelectionDAG &DAG,
unsigned EltBits = VT.getScalarSizeInBits();
// Check to see if we can do this.
- if (!isTypeLegal(VT))
+ if (!isTypeLegal(VT) ||
+ !isOperationLegalOrCustom(ISD::VP_MULHU, VT, IsAfterLegalization))
return SDValue();
bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
@@ -6908,9 +6908,7 @@ SDValue TargetLowering::BuildVPUDIV(SDNode *N, SelectionDAG &DAG,
}
auto GetMULHU = [&](SDValue X, SDValue Y) {
- if (isOperationLegalOrCustom(ISD::VP_MULHU, VT, IsAfterLegalization))
- return DAG.getNode(ISD::VP_MULHU, DL, VT, X, Y, Mask, VL);
- return SDValue();
+ return DAG.getNode(ISD::VP_MULHU, DL, VT, X, Y, Mask, VL);
};
// Multiply the numerator (operand 0) by the magic value.
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll
new file mode 100644
index 0000000000000..2fa4abb642270
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll
@@ -0,0 +1,113 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs | FileCheck %s
+
+declare <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.shl.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+
+
+define <vscale x 1 x i64> @vpudiv_by_const_no_add_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_const_no_add_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 5
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vdivu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 5, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vpudiv_by_const_with_add_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpudiv_by_const_with_add_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 7
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vdivu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 7, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vpsdiv_const_no_ashr_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_no_ashr_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 3
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 3, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vpsdiv_const_ashr_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_ashr_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 5
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 5, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vpsdiv_const_add_ashr_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_add_ashr_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 15
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 15, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vpsdiv_const_sub_ashr_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -3
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 -3, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vpurem_by_const_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpurem_by_const_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 5
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vremu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 5, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vpsrem_by_const_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpsrem_by_const_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 5
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vrem.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %vec = insertelement <vscale x 1 x i64> undef, i64 5, i32 0
+ %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
>From 81032b1a3891dc5c9612ad5abb7e1fc8bb1f6118 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 13 Feb 2023 08:39:03 -0800
Subject: [PATCH 04/10] [TargetLowering] Optimize 'factor' code in BuildVPSDIV.
We can't constant fold VP_MUL yet or combine (VP_SUB 0, X) and
VP_ADD.
Add some flags to keep track of when we need to emit VP_MUL/VP_ADD/VP_SUB.
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 25 ++-
llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll | 142 +++++++-----------
2 files changed, 72 insertions(+), 95 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e2b74408885c0..b7846212a94ab 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6511,6 +6511,9 @@ SDValue TargetLowering::BuildVPSDIV(SDNode *N, SelectionDAG &DAG,
!isOperationLegalOrCustom(ISD::VP_MULHS, VT, IsAfterLegalization))
return SDValue();
+ bool AnyFactorOne = false;
+ bool AnyFactorNegOne = false;
+
SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
@@ -6529,12 +6532,16 @@ SDValue TargetLowering::BuildVPSDIV(SDNode *N, SelectionDAG &DAG,
magics.Magic = 0;
magics.ShiftAmount = 0;
ShiftMask = 0;
+ AnyFactorOne |= Divisor.isOne();
+ AnyFactorNegOne |= Divisor.isAllOnes();
} else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
// If d > 0 and m < 0, add the numerator.
NumeratorFactor = 1;
+ AnyFactorOne = true;
} else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
// If d < 0 and m > 0, subtract the numerator.
NumeratorFactor = -1;
+ AnyFactorNegOne = true;
}
MagicFactors.push_back(DAG.getConstant(magics.Magic, DL, SVT));
@@ -6588,10 +6595,20 @@ SDValue TargetLowering::BuildVPSDIV(SDNode *N, SelectionDAG &DAG,
Created.push_back(Q.getNode());
// (Optionally) Add/subtract the numerator using Factor.
- Factor = DAG.getNode(ISD::VP_MUL, DL, VT, N0, Factor, Mask, VL);
- Created.push_back(Factor.getNode());
- Q = DAG.getNode(ISD::VP_ADD, DL, VT, Q, Factor, Mask, VL);
- Created.push_back(Q.getNode());
+ // FIXME: The AnyFactorOne/NegOne flags are a hack around lack of constant
+ // folding for VP_MUL/ADD.
+ if (AnyFactorOne && AnyFactorNegOne) {
+ Factor = DAG.getNode(ISD::VP_MUL, DL, VT, N0, Factor, Mask, VL);
+ Created.push_back(Factor.getNode());
+ Q = DAG.getNode(ISD::VP_ADD, DL, VT, Q, Factor, Mask, VL);
+ Created.push_back(Q.getNode());
+ } else if (AnyFactorOne) {
+ Q = DAG.getNode(ISD::VP_ADD, DL, VT, Q, N0, Mask, VL);
+ Created.push_back(Q.getNode());
+ } else if (AnyFactorNegOne) {
+ Q = DAG.getNode(ISD::VP_SUB, DL, VT, Q, N0, Mask, VL);
+ Created.push_back(Q.getNode());
+ }
// Shift right algebraic by shift value.
Q = DAG.getNode(ISD::VP_SRA, DL, VT, Q, Shift, Mask, VL);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll
index f78a0ec7f2378..6e417e4dd7995 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll
@@ -707,11 +707,9 @@ define <vscale x 1 x i64> @vpsdiv_pow2_nxv1i64(<vscale x 1 x i64> %va, <vscale x
define <vscale x 8 x i8> @vpsdiv_const_no_ashr_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_const_no_ashr_nxv8i8:
; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 86
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
-; CHECK-NEXT: li a0, 86
-; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t
; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
@@ -726,12 +724,10 @@ define <vscale x 8 x i8> @vpsdiv_const_no_ashr_nxv8i8(<vscale x 8 x i8> %va, <vs
define <vscale x 4 x i16> @vpsdiv_const_no_ashr_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_const_no_ashr_nxv4i16:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 5
+; CHECK-NEXT: addiw a1, a1, 1366
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
-; CHECK-NEXT: lui a0, 5
-; CHECK-NEXT: addiw a0, a0, 1366
-; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t
; CHECK-NEXT: vsrl.vi v9, v8, 15, v0.t
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
@@ -746,12 +742,10 @@ define <vscale x 4 x i16> @vpsdiv_const_no_ashr_nxv4i16(<vscale x 4 x i16> %va,
define <vscale x 2 x i32> @vpsdiv_const_no_ashr_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_const_no_ashr_nxv2i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 349525
+; CHECK-NEXT: addiw a1, a1, 1366
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
-; CHECK-NEXT: lui a0, 349525
-; CHECK-NEXT: addiw a0, a0, 1366
-; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t
; CHECK-NEXT: vsrl.vi v9, v8, 31, v0.t
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
@@ -769,9 +763,7 @@ define <vscale x 1 x i64> @vpsdiv_const_no_ashr_nxv1i64(<vscale x 1 x i64> %va,
; CHECK-NEXT: lui a1, %hi(.LCPI39_0)
; CHECK-NEXT: ld a1, %lo(.LCPI39_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t
; CHECK-NEXT: li a0, 63
; CHECK-NEXT: vsrl.vx v9, v8, a0, v0.t
@@ -787,11 +779,9 @@ define <vscale x 1 x i64> @vpsdiv_const_no_ashr_nxv1i64(<vscale x 1 x i64> %va,
define <vscale x 8 x i8> @vpsdiv_const_ashr_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_const_ashr_nxv8i8:
; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 103
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
-; CHECK-NEXT: li a0, 103
-; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t
; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
@@ -806,12 +796,10 @@ define <vscale x 8 x i8> @vpsdiv_const_ashr_nxv8i8(<vscale x 8 x i8> %va, <vscal
define <vscale x 4 x i16> @vpsdiv_const_ashr_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_const_ashr_nxv4i16:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 6
+; CHECK-NEXT: addiw a1, a1, 1639
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
-; CHECK-NEXT: lui a0, 6
-; CHECK-NEXT: addiw a0, a0, 1639
-; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t
; CHECK-NEXT: vsrl.vi v9, v8, 15, v0.t
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
@@ -826,12 +814,10 @@ define <vscale x 4 x i16> @vpsdiv_const_ashr_nxv4i16(<vscale x 4 x i16> %va, <vs
define <vscale x 2 x i32> @vpsdiv_const_ashr_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_const_ashr_nxv2i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 419430
+; CHECK-NEXT: addiw a1, a1, 1639
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
-; CHECK-NEXT: lui a0, 419430
-; CHECK-NEXT: addiw a0, a0, 1639
-; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t
; CHECK-NEXT: vsrl.vi v9, v8, 31, v0.t
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
@@ -849,9 +835,7 @@ define <vscale x 1 x i64> @vpsdiv_const_ashr_nxv1i64(<vscale x 1 x i64> %va, <vs
; CHECK-NEXT: lui a1, %hi(.LCPI43_0)
; CHECK-NEXT: ld a1, %lo(.LCPI43_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t
; CHECK-NEXT: li a0, 63
; CHECK-NEXT: vsrl.vx v9, v8, a0, v0.t
@@ -867,12 +851,10 @@ define <vscale x 1 x i64> @vpsdiv_const_ashr_nxv1i64(<vscale x 1 x i64> %va, <vs
define <vscale x 8 x i8> @vpsdiv_const_add_ashr_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_const_add_ashr_nxv8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: li a1, -109
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t
-; CHECK-NEXT: li a0, -109
-; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 2, v0.t
; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
@@ -887,13 +869,11 @@ define <vscale x 8 x i8> @vpsdiv_const_add_ashr_nxv8i8(<vscale x 8 x i8> %va, <v
define <vscale x 4 x i16> @vpsdiv_const_add_ashr_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_const_add_ashr_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: lui a1, 1048569
+; CHECK-NEXT: addiw a1, a1, -1911
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t
-; CHECK-NEXT: lui a0, 1048569
-; CHECK-NEXT: addiw a0, a0, -1911
-; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 3, v0.t
; CHECK-NEXT: vsrl.vi v9, v8, 15, v0.t
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
@@ -908,13 +888,11 @@ define <vscale x 4 x i16> @vpsdiv_const_add_ashr_nxv4i16(<vscale x 4 x i16> %va,
define <vscale x 2 x i32> @vpsdiv_const_add_ashr_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_const_add_ashr_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: lui a1, 599186
+; CHECK-NEXT: addiw a1, a1, 1171
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t
-; CHECK-NEXT: lui a0, 599186
-; CHECK-NEXT: addiw a0, a0, 1171
-; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 2, v0.t
; CHECK-NEXT: vsrl.vi v9, v8, 31, v0.t
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
@@ -932,10 +910,8 @@ define <vscale x 1 x i64> @vpsdiv_const_add_ashr_nxv1i64(<vscale x 1 x i64> %va,
; CHECK-NEXT: lui a1, %hi(.LCPI47_0)
; CHECK-NEXT: ld a1, %lo(.LCPI47_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: li a0, 1
-; CHECK-NEXT: vmul.vx v9, v8, a0, v0.t
-; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 3, v0.t
; CHECK-NEXT: li a0, 63
; CHECK-NEXT: vsrl.vx v9, v8, a0, v0.t
@@ -951,12 +927,10 @@ define <vscale x 1 x i64> @vpsdiv_const_add_ashr_nxv1i64(<vscale x 1 x i64> %va,
define <vscale x 8 x i8> @vpsdiv_const_sub_ashr_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: li a1, 109
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t
-; CHECK-NEXT: li a0, 109
-; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vsub.vv v8, v9, v8, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 2, v0.t
; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
@@ -971,13 +945,11 @@ define <vscale x 8 x i8> @vpsdiv_const_sub_ashr_nxv8i8(<vscale x 8 x i8> %va, <v
define <vscale x 4 x i16> @vpsdiv_const_sub_ashr_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: lui a1, 7
+; CHECK-NEXT: addiw a1, a1, 1911
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t
-; CHECK-NEXT: lui a0, 7
-; CHECK-NEXT: addiw a0, a0, 1911
-; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vsub.vv v8, v9, v8, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 3, v0.t
; CHECK-NEXT: vsrl.vi v9, v8, 15, v0.t
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
@@ -992,13 +964,11 @@ define <vscale x 4 x i16> @vpsdiv_const_sub_ashr_nxv4i16(<vscale x 4 x i16> %va,
define <vscale x 2 x i32> @vpsdiv_const_sub_ashr_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: lui a1, 449390
+; CHECK-NEXT: addiw a1, a1, -1171
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t
-; CHECK-NEXT: lui a0, 449390
-; CHECK-NEXT: addiw a0, a0, -1171
-; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vsub.vv v8, v9, v8, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 2, v0.t
; CHECK-NEXT: vsrl.vi v9, v8, 31, v0.t
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
@@ -1016,10 +986,8 @@ define <vscale x 1 x i64> @vpsdiv_const_sub_ashr_nxv1i64(<vscale x 1 x i64> %va,
; CHECK-NEXT: lui a1, %hi(.LCPI51_0)
; CHECK-NEXT: ld a1, %lo(.LCPI51_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: vmul.vx v9, v8, a0, v0.t
-; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vsub.vv v8, v9, v8, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t
; CHECK-NEXT: li a0, 63
; CHECK-NEXT: vsrl.vx v9, v8, a0, v0.t
@@ -1185,9 +1153,7 @@ define <vscale x 1 x i64> @vpsrem_by_const_nxv1i64(<vscale x 1 x i64> %va, <vsca
; CHECK-NEXT: lui a1, %hi(.LCPI60_0)
; CHECK-NEXT: ld a1, %lo(.LCPI60_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
-; CHECK-NEXT: vmulh.vx v10, v8, a1, v0.t
-; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t
+; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t
; CHECK-NEXT: li a0, 63
; CHECK-NEXT: vsrl.vx v10, v9, a0, v0.t
@@ -1206,12 +1172,10 @@ define <vscale x 1 x i64> @vpsrem_by_const_nxv1i64(<vscale x 1 x i64> %va, <vsca
define <vscale x 4 x i16> @vpsrem_by_const_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsrem_by_const_nxv4i16:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 6
+; CHECK-NEXT: addiw a1, a1, 1639
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
-; CHECK-NEXT: lui a0, 6
-; CHECK-NEXT: addiw a0, a0, 1639
-; CHECK-NEXT: vmulh.vx v10, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t
+; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t
; CHECK-NEXT: vsrl.vi v10, v9, 15, v0.t
; CHECK-NEXT: vand.vi v10, v10, -1, v0.t
@@ -1229,11 +1193,9 @@ define <vscale x 4 x i16> @vpsrem_by_const_nxv4i16(<vscale x 4 x i16> %va, <vsca
define <vscale x 8 x i8> @vpsrem_by_const_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsrem_by_const_nxv8i8:
; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 103
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
-; CHECK-NEXT: li a0, 103
-; CHECK-NEXT: vmulh.vx v10, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t
+; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t
; CHECK-NEXT: vsrl.vi v10, v9, 7, v0.t
; CHECK-NEXT: vand.vi v10, v10, -1, v0.t
@@ -1251,12 +1213,10 @@ define <vscale x 8 x i8> @vpsrem_by_const_nxv8i8(<vscale x 8 x i8> %va, <vscale
define <vscale x 2 x i32> @vpsrem_by_const_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsrem_by_const_nxv2i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 419430
+; CHECK-NEXT: addiw a1, a1, 1639
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t
-; CHECK-NEXT: lui a0, 419430
-; CHECK-NEXT: addiw a0, a0, 1639
-; CHECK-NEXT: vmulh.vx v10, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t
+; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t
; CHECK-NEXT: vsrl.vi v10, v9, 31, v0.t
; CHECK-NEXT: vand.vi v10, v10, -1, v0.t
>From b833489b707156896d7fc28d97c2707aeba59230 Mon Sep 17 00:00:00 2001
From: Jesse Huang <jesse.huang at sifive.com>
Date: Thu, 6 Feb 2025 02:38:26 +0800
Subject: [PATCH 05/10] update test
---
llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll | 613 +++++++++++++-----
1 file changed, 434 insertions(+), 179 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll
index 6e417e4dd7995..b39fc392482cf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll
@@ -28,9 +28,7 @@ define <vscale x 8 x i8> @vpudiv_by_max_nxv8i8(<vscale x 8 x i8> %va, <vscale x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 8 x i8> undef, i8 255, i32 0
@@ -44,9 +42,7 @@ define <vscale x 4 x i16> @vpudiv_by_max_nxv4i16(<vscale x 4 x i16> %va, <vscale
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 4 x i16> undef, i16 65535, i32 0
@@ -60,9 +56,7 @@ define <vscale x 2 x i32> @vpudiv_by_max_nxv2i32(<vscale x 2 x i32> %va, <vscale
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t
-; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 2 x i32> undef, i32 4294967295, i32 0
@@ -76,9 +70,7 @@ define <vscale x 1 x i64> @vpudiv_by_max_nxv1i64(<vscale x 1 x i64> %va, <vscale
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t
-; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 1 x i64> undef, i64 18446744073709551615, i32 0
@@ -92,8 +84,7 @@ define <vscale x 8 x i8> @fold_vpudiv_vpurem_nxv8i8(<vscale x 8 x i8> %va, <vsca
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t
-; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vmul.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vsll.vi v10, v9, 7, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v9, v8
@@ -111,8 +102,7 @@ define <vscale x 4 x i16> @fold_vpudiv_vpurem_nxv4i16(<vscale x 4 x i16> %va, <v
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vsrl.vi v9, v8, 14, v0.t
-; CHECK-NEXT: lui a0, 4
-; CHECK-NEXT: vmul.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vsll.vi v10, v9, 14, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v9, v8
@@ -130,8 +120,7 @@ define <vscale x 2 x i32> @fold_vpudiv_vpurem_nxv2i32(<vscale x 2 x i32> %va, <v
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vsrl.vi v9, v8, 14, v0.t
-; CHECK-NEXT: lui a0, 4
-; CHECK-NEXT: vmul.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vsll.vi v10, v9, 14, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v9, v8
@@ -149,8 +138,7 @@ define <vscale x 1 x i64> @fold_vpudiv_vpurem_nxv1i64(<vscale x 1 x i64> %va, <v
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vsrl.vi v9, v8, 14, v0.t
-; CHECK-NEXT: lui a0, 4
-; CHECK-NEXT: vmul.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vsll.vi v10, v9, 14, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v9, v8
@@ -197,8 +185,6 @@ define <vscale x 4 x i16> @vpudiv_by_shl2_nxv4i16(<vscale x 4 x i16> %va, i16 %b
define <vscale x 2 x i32> @vpudiv_by_shl2_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_shl2_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: slli a0, a0, 32
-; CHECK-NEXT: srli a0, a0, 32
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
@@ -227,9 +213,8 @@ define <vscale x 1 x i64> @vpudiv_by_shl2_nxv1i64(<vscale x 1 x i64> %va, i64 %b
define <vscale x 8 x i8> @vpudiv_by_vpshl2_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_vpshl2_nxv8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t
; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
@@ -245,9 +230,8 @@ define <vscale x 8 x i8> @vpudiv_by_vpshl2_nxv8i8(<vscale x 8 x i8> %va, i8 %b,
define <vscale x 4 x i16> @vpudiv_by_vpshl2_nxv4i16(<vscale x 4 x i16> %va, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_vpshl2_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t
; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
@@ -263,9 +247,8 @@ define <vscale x 4 x i16> @vpudiv_by_vpshl2_nxv4i16(<vscale x 4 x i16> %va, i16
define <vscale x 2 x i32> @vpudiv_by_vpshl2_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_vpshl2_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t
; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
@@ -281,9 +264,8 @@ define <vscale x 2 x i32> @vpudiv_by_vpshl2_nxv2i32(<vscale x 2 x i32> %va, i32
define <vscale x 1 x i64> @vpudiv_by_vpshl2_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_vpshl2_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t
; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
@@ -299,11 +281,10 @@ define <vscale x 1 x i64> @vpudiv_by_vpshl2_nxv1i64(<vscale x 1 x i64> %va, i64
define <vscale x 8 x i8> @vpudiv_by_const_no_add_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_const_no_add_nxv8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 5
-; CHECK-NEXT: li a1, -51
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vmv.v.i v9, 5
+; CHECK-NEXT: li a0, -51
+; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t
; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
@@ -317,12 +298,11 @@ define <vscale x 8 x i8> @vpudiv_by_const_no_add_nxv8i8(<vscale x 8 x i8> %va, <
define <vscale x 4 x i16> @vpudiv_by_const_no_add_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_const_no_add_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 5
-; CHECK-NEXT: lui a1, 1048573
-; CHECK-NEXT: addiw a1, a1, -819
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vmv.v.i v9, 5
+; CHECK-NEXT: lui a0, 1048573
+; CHECK-NEXT: addi a0, a0, -819
+; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t
; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
@@ -336,12 +316,11 @@ define <vscale x 4 x i16> @vpudiv_by_const_no_add_nxv4i16(<vscale x 4 x i16> %va
define <vscale x 2 x i32> @vpudiv_by_const_no_add_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_const_no_add_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 5
-; CHECK-NEXT: lui a1, 838861
-; CHECK-NEXT: addiw a1, a1, -819
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vmv.v.i v9, 5
+; CHECK-NEXT: lui a0, 838861
+; CHECK-NEXT: addi a0, a0, -819
+; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t
; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
@@ -355,14 +334,16 @@ define <vscale x 2 x i32> @vpudiv_by_const_no_add_nxv2i32(<vscale x 2 x i32> %va
define <vscale x 1 x i64> @vpudiv_by_const_no_add_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_const_no_add_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI19_0)
-; CHECK-NEXT: ld a1, %lo(.LCPI19_0)(a1)
-; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 5
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vmv.v.i v9, 5
+; CHECK-NEXT: lui a0, 838861
+; CHECK-NEXT: vmseq.vi v9, v9, 1, v0.t
+; CHECK-NEXT: addiw a0, a0, -819
+; CHECK-NEXT: slli a1, a0, 32
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t
; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
-; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t
+; CHECK-NEXT: vmv.v.v v0, v9
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 1 x i64> undef, i64 5, i32 0
@@ -374,13 +355,12 @@ define <vscale x 1 x i64> @vpudiv_by_const_no_add_nxv1i64(<vscale x 1 x i64> %va
define <vscale x 8 x i8> @vpudiv_by_const_with_add_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_const_with_add_nxv8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 7
-; CHECK-NEXT: li a1, 37
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t
-; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t
+; CHECK-NEXT: vmv.v.i v9, 7
+; CHECK-NEXT: li a0, 37
+; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t
; CHECK-NEXT: li a0, -128
+; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t
; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t
; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t
; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
@@ -396,14 +376,13 @@ define <vscale x 8 x i8> @vpudiv_by_const_with_add_nxv8i8(<vscale x 8 x i8> %va,
define <vscale x 4 x i16> @vpudiv_by_const_with_add_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_const_with_add_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 7
-; CHECK-NEXT: lui a1, 2
-; CHECK-NEXT: addiw a1, a1, 1171
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t
-; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t
+; CHECK-NEXT: vmv.v.i v9, 7
+; CHECK-NEXT: lui a0, 2
+; CHECK-NEXT: addi a0, a0, 1171
+; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t
; CHECK-NEXT: lui a0, 1048568
+; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t
; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t
; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t
; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
@@ -419,14 +398,13 @@ define <vscale x 4 x i16> @vpudiv_by_const_with_add_nxv4i16(<vscale x 4 x i16> %
define <vscale x 2 x i32> @vpudiv_by_const_with_add_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_const_with_add_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 7
-; CHECK-NEXT: lui a1, 149797
-; CHECK-NEXT: addiw a1, a1, -1755
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t
-; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t
+; CHECK-NEXT: vmv.v.i v9, 7
+; CHECK-NEXT: lui a0, 149797
+; CHECK-NEXT: addi a0, a0, -1755
+; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t
; CHECK-NEXT: lui a0, 524288
+; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t
; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t
; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t
; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
@@ -444,13 +422,12 @@ define <vscale x 1 x i64> @vpudiv_by_const_with_add_nxv1i64(<vscale x 1 x i64> %
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
; CHECK-NEXT: ld a1, %lo(.LCPI23_0)(a1)
-; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 7
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 7
; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t
-; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t
; CHECK-NEXT: li a0, -1
; CHECK-NEXT: slli a0, a0, 63
+; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t
; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t
; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t
; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
@@ -517,9 +494,7 @@ define <vscale x 8 x i8> @vpsdiv_by_min_nxv8i8(<vscale x 8 x i8> %va, <vscale x
; CHECK-NEXT: li a1, -128
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vmseq.vx v0, v8, a1, v0.t
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 8 x i8> undef, i8 -128, i32 0
@@ -535,9 +510,7 @@ define <vscale x 1 x i64> @vpsdiv_by_min_nxv1i64(<vscale x 1 x i64> %va, <vscale
; CHECK-NEXT: slli a1, a1, 63
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vmseq.vx v0, v8, a1, v0.t
-; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 1 x i64> undef, i64 -9223372036854775808, i32 0
@@ -552,9 +525,7 @@ define <vscale x 4 x i16> @vpsdiv_by_min_nxv4i16(<vscale x 4 x i16> %va, <vscale
; CHECK-NEXT: lui a1, 1048568
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vmseq.vx v0, v8, a1, v0.t
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 4 x i16> undef, i16 -32768, i32 0
@@ -569,9 +540,7 @@ define <vscale x 2 x i32> @vpsdiv_by_min_nxv2i32(<vscale x 2 x i32> %va, <vscale
; CHECK-NEXT: lui a1, 524288
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vmseq.vx v0, v8, a1, v0.t
-; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 2 x i32> undef, i32 -2147483648, i32 0
@@ -583,10 +552,9 @@ define <vscale x 2 x i32> @vpsdiv_by_min_nxv2i32(<vscale x 2 x i32> %va, <vscale
define <vscale x 4 x i16> @vpsdiv_pow2_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_pow2_nxv4i16:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 4
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t
; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
; CHECK-NEXT: vmor.mm v10, v10, v11
@@ -594,15 +562,12 @@ define <vscale x 4 x i16> @vpsdiv_pow2_nxv4i16(<vscale x 4 x i16> %va, <vscale x
; CHECK-NEXT: vsrl.vi v11, v11, 14, v0.t
; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t
; CHECK-NEXT: vsra.vi v11, v11, 2, v0.t
+; CHECK-NEXT: vmv.v.i v12, 0
; CHECK-NEXT: vmv.v.v v0, v10
; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v11, 0
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vmsgt.vi v0, v11, 4, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v12, 4, v0.t
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 4 x i16> undef, i16 4, i32 0
@@ -614,10 +579,9 @@ define <vscale x 4 x i16> @vpsdiv_pow2_nxv4i16(<vscale x 4 x i16> %va, <vscale x
define <vscale x 8 x i8> @vpsdiv_pow2_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_pow2_nxv8i8:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 4
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t
; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
; CHECK-NEXT: vmor.mm v10, v10, v11
@@ -625,15 +589,12 @@ define <vscale x 8 x i8> @vpsdiv_pow2_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8
; CHECK-NEXT: vsrl.vi v11, v11, 6, v0.t
; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t
; CHECK-NEXT: vsra.vi v11, v11, 2, v0.t
+; CHECK-NEXT: vmv.v.i v12, 0
; CHECK-NEXT: vmv.v.v v0, v10
; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v11, 0
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vmsgt.vi v0, v11, 4, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v12, 4, v0.t
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 8 x i8> undef, i8 4, i32 0
@@ -645,10 +606,9 @@ define <vscale x 8 x i8> @vpsdiv_pow2_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8
define <vscale x 2 x i32> @vpsdiv_pow2_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_pow2_nxv2i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 4
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t
; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
; CHECK-NEXT: vmor.mm v10, v10, v11
@@ -656,15 +616,12 @@ define <vscale x 2 x i32> @vpsdiv_pow2_nxv2i32(<vscale x 2 x i32> %va, <vscale x
; CHECK-NEXT: vsrl.vi v11, v11, 30, v0.t
; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t
; CHECK-NEXT: vsra.vi v11, v11, 2, v0.t
+; CHECK-NEXT: vmv.v.i v12, 0
; CHECK-NEXT: vmv.v.v v0, v10
; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
-; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v11, 0
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vmsgt.vi v0, v11, 4, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v12, 4, v0.t
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 2 x i32> undef, i32 4, i32 0
@@ -676,26 +633,24 @@ define <vscale x 2 x i32> @vpsdiv_pow2_nxv2i32(<vscale x 2 x i32> %va, <vscale x
define <vscale x 1 x i64> @vpsdiv_pow2_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_pow2_nxv1i64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 4
-; CHECK-NEXT: li a1, 63
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vsra.vx v11, v8, a1, v0.t
-; CHECK-NEXT: li a1, 62
-; CHECK-NEXT: vsrl.vx v11, v11, a1, v0.t
+; CHECK-NEXT: li a0, 63
+; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vsra.vx v12, v8, a0, v0.t
+; CHECK-NEXT: li a0, 62
+; CHECK-NEXT: vmor.mm v10, v10, v11
+; CHECK-NEXT: vsrl.vx v11, v12, a0, v0.t
; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t
; CHECK-NEXT: vsra.vi v11, v11, 2, v0.t
-; CHECK-NEXT: vmseq.vi v12, v10, -1, v0.t
-; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
-; CHECK-NEXT: vmor.mm v0, v10, v12
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: vmv.v.v v0, v10
; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
-; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v11, 0
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vmsgt.vi v0, v11, 4, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v12, 4, v0.t
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 1 x i64> undef, i64 4, i32 0
@@ -725,7 +680,7 @@ define <vscale x 4 x i16> @vpsdiv_const_no_ashr_nxv4i16(<vscale x 4 x i16> %va,
; CHECK-LABEL: vpsdiv_const_no_ashr_nxv4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, 5
-; CHECK-NEXT: addiw a1, a1, 1366
+; CHECK-NEXT: addi a1, a1, 1366
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t
@@ -743,7 +698,7 @@ define <vscale x 2 x i32> @vpsdiv_const_no_ashr_nxv2i32(<vscale x 2 x i32> %va,
; CHECK-LABEL: vpsdiv_const_no_ashr_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, 349525
-; CHECK-NEXT: addiw a1, a1, 1366
+; CHECK-NEXT: addi a1, a1, 1366
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t
@@ -797,7 +752,7 @@ define <vscale x 4 x i16> @vpsdiv_const_ashr_nxv4i16(<vscale x 4 x i16> %va, <vs
; CHECK-LABEL: vpsdiv_const_ashr_nxv4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, 6
-; CHECK-NEXT: addiw a1, a1, 1639
+; CHECK-NEXT: addi a1, a1, 1639
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t
@@ -815,7 +770,7 @@ define <vscale x 2 x i32> @vpsdiv_const_ashr_nxv2i32(<vscale x 2 x i32> %va, <vs
; CHECK-LABEL: vpsdiv_const_ashr_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, 419430
-; CHECK-NEXT: addiw a1, a1, 1639
+; CHECK-NEXT: addi a1, a1, 1639
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t
@@ -870,7 +825,7 @@ define <vscale x 4 x i16> @vpsdiv_const_add_ashr_nxv4i16(<vscale x 4 x i16> %va,
; CHECK-LABEL: vpsdiv_const_add_ashr_nxv4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, 1048569
-; CHECK-NEXT: addiw a1, a1, -1911
+; CHECK-NEXT: addi a1, a1, -1911
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
@@ -889,7 +844,7 @@ define <vscale x 2 x i32> @vpsdiv_const_add_ashr_nxv2i32(<vscale x 2 x i32> %va,
; CHECK-LABEL: vpsdiv_const_add_ashr_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, 599186
-; CHECK-NEXT: addiw a1, a1, 1171
+; CHECK-NEXT: addi a1, a1, 1171
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
@@ -907,8 +862,10 @@ define <vscale x 2 x i32> @vpsdiv_const_add_ashr_nxv2i32(<vscale x 2 x i32> %va,
define <vscale x 1 x i64> @vpsdiv_const_add_ashr_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_const_add_ashr_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI47_0)
-; CHECK-NEXT: ld a1, %lo(.LCPI47_0)(a1)
+; CHECK-NEXT: lui a1, 559241
+; CHECK-NEXT: addiw a1, a1, -1911
+; CHECK-NEXT: slli a2, a1, 32
+; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
@@ -946,7 +903,7 @@ define <vscale x 4 x i16> @vpsdiv_const_sub_ashr_nxv4i16(<vscale x 4 x i16> %va,
; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, 7
-; CHECK-NEXT: addiw a1, a1, 1911
+; CHECK-NEXT: addi a1, a1, 1911
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
; CHECK-NEXT: vsub.vv v8, v9, v8, v0.t
@@ -965,7 +922,7 @@ define <vscale x 2 x i32> @vpsdiv_const_sub_ashr_nxv2i32(<vscale x 2 x i32> %va,
; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, 449390
-; CHECK-NEXT: addiw a1, a1, -1171
+; CHECK-NEXT: addi a1, a1, -1171
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
; CHECK-NEXT: vsub.vv v8, v9, v8, v0.t
@@ -983,8 +940,10 @@ define <vscale x 2 x i32> @vpsdiv_const_sub_ashr_nxv2i32(<vscale x 2 x i32> %va,
define <vscale x 1 x i64> @vpsdiv_const_sub_ashr_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI51_0)
-; CHECK-NEXT: ld a1, %lo(.LCPI51_0)(a1)
+; CHECK-NEXT: lui a1, 349525
+; CHECK-NEXT: addiw a1, a1, 1365
+; CHECK-NEXT: slli a2, a1, 32
+; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
; CHECK-NEXT: vsub.vv v8, v9, v8, v0.t
@@ -1055,15 +1014,17 @@ define <vscale x 2 x i32> @vpurem_by_max_nxv2i32(<vscale x 2 x i32> %va, <vscale
define <vscale x 1 x i64> @vpurem_by_const_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpurem_by_const_nxv1i64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: lui a1, %hi(.LCPI56_0)
-; CHECK-NEXT: ld a1, %lo(.LCPI56_0)(a1)
-; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 5
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vmulhu.vx v11, v8, a1, v0.t
+; CHECK-NEXT: lui a0, 838861
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: addiw a0, a0, -819
+; CHECK-NEXT: slli a1, a0, 32
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vmulhu.vx v11, v8, a0, v0.t
; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t
-; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0
; CHECK-NEXT: li a0, 5
; CHECK-NEXT: vmv1r.v v0, v9
@@ -1079,17 +1040,16 @@ define <vscale x 1 x i64> @vpurem_by_const_nxv1i64(<vscale x 1 x i64> %va, <vsca
define <vscale x 4 x i16> @vpurem_by_const_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpurem_by_const_nxv4i16:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 5
-; CHECK-NEXT: lui a1, 1048573
-; CHECK-NEXT: addiw a1, a1, -819
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmulhu.vx v11, v8, a1, v0.t
+; CHECK-NEXT: lui a0, 1048573
+; CHECK-NEXT: addi a0, a0, -819
+; CHECK-NEXT: vmulhu.vx v11, v8, a0, v0.t
; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t
; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t
-; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0
; CHECK-NEXT: li a0, 5
+; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmul.vx v10, v10, a0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
@@ -1103,16 +1063,15 @@ define <vscale x 4 x i16> @vpurem_by_const_nxv4i16(<vscale x 4 x i16> %va, <vsca
define <vscale x 8 x i8> @vpurem_by_const_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpurem_by_const_nxv8i8:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 5
-; CHECK-NEXT: li a1, -51
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vmulhu.vx v11, v8, a1, v0.t
+; CHECK-NEXT: li a0, -51
+; CHECK-NEXT: vmulhu.vx v11, v8, a0, v0.t
; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t
; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t
-; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0
; CHECK-NEXT: li a0, 5
+; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmul.vx v10, v10, a0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
@@ -1126,17 +1085,16 @@ define <vscale x 8 x i8> @vpurem_by_const_nxv8i8(<vscale x 8 x i8> %va, <vscale
define <vscale x 2 x i32> @vpurem_by_const_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpurem_by_const_nxv2i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 5
-; CHECK-NEXT: lui a1, 838861
-; CHECK-NEXT: addiw a1, a1, -819
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vmulhu.vx v11, v8, a1, v0.t
+; CHECK-NEXT: lui a0, 838861
+; CHECK-NEXT: addi a0, a0, -819
+; CHECK-NEXT: vmulhu.vx v11, v8, a0, v0.t
; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t
; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t
-; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0
; CHECK-NEXT: li a0, 5
+; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmul.vx v10, v10, a0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
@@ -1154,8 +1112,8 @@ define <vscale x 1 x i64> @vpsrem_by_const_nxv1i64(<vscale x 1 x i64> %va, <vsca
; CHECK-NEXT: ld a1, %lo(.LCPI60_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
-; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t
; CHECK-NEXT: li a0, 63
+; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t
; CHECK-NEXT: vsrl.vx v10, v9, a0, v0.t
; CHECK-NEXT: vand.vi v10, v10, -1, v0.t
; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t
@@ -1173,7 +1131,7 @@ define <vscale x 4 x i16> @vpsrem_by_const_nxv4i16(<vscale x 4 x i16> %va, <vsca
; CHECK-LABEL: vpsrem_by_const_nxv4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, 6
-; CHECK-NEXT: addiw a1, a1, 1639
+; CHECK-NEXT: addi a1, a1, 1639
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t
@@ -1214,7 +1172,7 @@ define <vscale x 2 x i32> @vpsrem_by_const_nxv2i32(<vscale x 2 x i32> %va, <vsca
; CHECK-LABEL: vpsrem_by_const_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, 419430
-; CHECK-NEXT: addiw a1, a1, 1639
+; CHECK-NEXT: addi a1, a1, 1639
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t
; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t
@@ -1234,6 +1192,23 @@ define <vscale x 2 x i32> @vpsrem_by_const_nxv2i32(<vscale x 2 x i32> %va, <vsca
define <vscale x 8 x i8> @vpudiv_by_1_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_1_nxv8i8:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv.v.i v10, 1
+; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vmor.mm v10, v10, v11
+; CHECK-NEXT: vsra.vi v11, v8, 7, v0.t
+; CHECK-NEXT: vsrl.vi v11, v11, 8, v0.t
+; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t
+; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 8 x i8> undef, i8 1, i32 0
%splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -1244,6 +1219,23 @@ define <vscale x 8 x i8> @vpudiv_by_1_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8
define <vscale x 4 x i16> @vpudiv_by_1_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_1_nxv4i16:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv.v.i v10, 1
+; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vmor.mm v10, v10, v11
+; CHECK-NEXT: vsra.vi v11, v8, 15, v0.t
+; CHECK-NEXT: vsrl.vi v11, v11, 16, v0.t
+; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t
+; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 4 x i16> undef, i16 1, i32 0
%splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -1254,6 +1246,24 @@ define <vscale x 4 x i16> @vpudiv_by_1_nxv4i16(<vscale x 4 x i16> %va, <vscale x
define <vscale x 2 x i32> @vpudiv_by_1_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_1_nxv2i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv.v.i v10, 1
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vmor.mm v10, v10, v11
+; CHECK-NEXT: vsra.vi v11, v8, 31, v0.t
+; CHECK-NEXT: vsrl.vx v11, v11, a0, v0.t
+; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t
+; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 2 x i32> undef, i32 1, i32 0
%splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -1264,6 +1274,25 @@ define <vscale x 2 x i32> @vpudiv_by_1_nxv2i32(<vscale x 2 x i32> %va, <vscale x
define <vscale x 1 x i64> @vpudiv_by_1_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_1_nxv1i64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv.v.i v10, 1
+; CHECK-NEXT: li a0, 63
+; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vsra.vx v12, v8, a0, v0.t
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vmor.mm v10, v10, v11
+; CHECK-NEXT: vsrl.vx v11, v12, a0, v0.t
+; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t
+; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 1 x i64> undef, i64 1, i32 0
%splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
@@ -1274,6 +1303,23 @@ define <vscale x 1 x i64> @vpudiv_by_1_nxv1i64(<vscale x 1 x i64> %va, <vscale x
define <vscale x 8 x i8> @vpsdiv_by_1_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_by_1_nxv8i8:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv.v.i v10, 1
+; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vmor.mm v10, v10, v11
+; CHECK-NEXT: vsra.vi v11, v8, 7, v0.t
+; CHECK-NEXT: vsrl.vi v11, v11, 8, v0.t
+; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t
+; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 8 x i8> undef, i8 1, i32 0
%splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -1284,6 +1330,23 @@ define <vscale x 8 x i8> @vpsdiv_by_1_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8
define <vscale x 4 x i16> @vpsdiv_by_1_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_by_1_nxv4i16:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv.v.i v10, 1
+; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vmor.mm v10, v10, v11
+; CHECK-NEXT: vsra.vi v11, v8, 15, v0.t
+; CHECK-NEXT: vsrl.vi v11, v11, 16, v0.t
+; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t
+; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 4 x i16> undef, i16 1, i32 0
%splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -1294,6 +1357,24 @@ define <vscale x 4 x i16> @vpsdiv_by_1_nxv4i16(<vscale x 4 x i16> %va, <vscale x
define <vscale x 2 x i32> @vpsdiv_by_1_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_by_1_nxv2i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv.v.i v10, 1
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vmor.mm v10, v10, v11
+; CHECK-NEXT: vsra.vi v11, v8, 31, v0.t
+; CHECK-NEXT: vsrl.vx v11, v11, a0, v0.t
+; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t
+; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 2 x i32> undef, i32 1, i32 0
%splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -1304,6 +1385,25 @@ define <vscale x 2 x i32> @vpsdiv_by_1_nxv2i32(<vscale x 2 x i32> %va, <vscale x
define <vscale x 1 x i64> @vpsdiv_by_1_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsdiv_by_1_nxv1i64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv.v.i v10, 1
+; CHECK-NEXT: li a0, 63
+; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vsra.vx v12, v8, a0, v0.t
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vmor.mm v10, v10, v11
+; CHECK-NEXT: vsrl.vx v11, v12, a0, v0.t
+; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t
+; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
%vec = insertelement <vscale x 1 x i64> undef, i64 1, i32 0
%splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
@@ -1314,8 +1414,9 @@ define <vscale x 1 x i64> @vpsdiv_by_1_nxv1i64(<vscale x 1 x i64> %va, <vscale x
define <vscale x 8 x i8> @vpurem_by_1_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpurem_by_1_nxv8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
%vec = insertelement <vscale x 8 x i8> undef, i8 1, i32 0
%splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -1326,8 +1427,9 @@ define <vscale x 8 x i8> @vpurem_by_1_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8
define <vscale x 4 x i16> @vpurem_by_1_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpurem_by_1_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
%vec = insertelement <vscale x 4 x i16> undef, i16 1, i32 0
%splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -1338,8 +1440,9 @@ define <vscale x 4 x i16> @vpurem_by_1_nxv4i16(<vscale x 4 x i16> %va, <vscale x
define <vscale x 2 x i32> @vpurem_by_1_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpurem_by_1_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
%vec = insertelement <vscale x 2 x i32> undef, i32 1, i32 0
%splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -1350,8 +1453,9 @@ define <vscale x 2 x i32> @vpurem_by_1_nxv2i32(<vscale x 2 x i32> %va, <vscale x
define <vscale x 1 x i64> @vpurem_by_1_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpurem_by_1_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vsrl.vi v9, v8, 0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
%vec = insertelement <vscale x 1 x i64> undef, i64 1, i32 0
%splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
@@ -1362,8 +1466,25 @@ define <vscale x 1 x i64> @vpurem_by_1_nxv1i64(<vscale x 1 x i64> %va, <vscale x
define <vscale x 8 x i8> @vpsrem_by_1_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsrem_by_1_nxv8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv.v.i v10, 1
+; CHECK-NEXT: vmv.v.i v11, 0
+; CHECK-NEXT: vmseq.vi v12, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vmor.mm v10, v10, v12
+; CHECK-NEXT: vsra.vi v12, v8, 7, v0.t
+; CHECK-NEXT: vsrl.vi v12, v12, 8, v0.t
+; CHECK-NEXT: vadd.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsra.vi v12, v12, 0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vvm v10, v12, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v12, v10, 0, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v11, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
%vec = insertelement <vscale x 8 x i8> undef, i8 1, i32 0
%splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -1374,8 +1495,25 @@ define <vscale x 8 x i8> @vpsrem_by_1_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8
define <vscale x 4 x i16> @vpsrem_by_1_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsrem_by_1_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv.v.i v10, 1
+; CHECK-NEXT: vmv.v.i v11, 0
+; CHECK-NEXT: vmseq.vi v12, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vmor.mm v10, v10, v12
+; CHECK-NEXT: vsra.vi v12, v8, 15, v0.t
+; CHECK-NEXT: vsrl.vi v12, v12, 16, v0.t
+; CHECK-NEXT: vadd.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsra.vi v12, v12, 0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vvm v10, v12, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v12, v10, 0, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v11, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
%vec = insertelement <vscale x 4 x i16> undef, i16 1, i32 0
%splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -1386,8 +1524,26 @@ define <vscale x 4 x i16> @vpsrem_by_1_nxv4i16(<vscale x 4 x i16> %va, <vscale x
define <vscale x 2 x i32> @vpsrem_by_1_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsrem_by_1_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv.v.i v10, 1
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vmv.v.i v11, 0
+; CHECK-NEXT: vmseq.vi v12, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vmor.mm v10, v10, v12
+; CHECK-NEXT: vsra.vi v12, v8, 31, v0.t
+; CHECK-NEXT: vsrl.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vadd.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsra.vi v12, v12, 0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vvm v10, v12, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v12, v10, 0, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v11, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
%vec = insertelement <vscale x 2 x i32> undef, i32 1, i32 0
%splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -1398,8 +1554,27 @@ define <vscale x 2 x i32> @vpsrem_by_1_nxv2i32(<vscale x 2 x i32> %va, <vscale x
define <vscale x 1 x i64> @vpsrem_by_1_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsrem_by_1_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv.v.i v10, 1
+; CHECK-NEXT: li a0, 63
+; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vsra.vx v12, v8, a0, v0.t
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vmv.v.i v13, 0
+; CHECK-NEXT: vmor.mm v10, v10, v11
+; CHECK-NEXT: vsrl.vx v11, v12, a0, v0.t
+; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t
+; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v11, v10, 0, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v13, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v10, v10, v11, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
%vec = insertelement <vscale x 1 x i64> undef, i64 1, i32 0
%splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
@@ -1410,8 +1585,26 @@ define <vscale x 1 x i64> @vpsrem_by_1_nxv1i64(<vscale x 1 x i64> %va, <vscale x
define <vscale x 8 x i8> @vpsrem_by_neg1_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsrem_by_neg1_nxv8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv.v.i v10, -1
+; CHECK-NEXT: vmv.v.i v11, 0
+; CHECK-NEXT: vmseq.vi v12, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vmor.mm v10, v10, v12
+; CHECK-NEXT: vsra.vi v12, v8, 7, v0.t
+; CHECK-NEXT: vsrl.vi v12, v12, 8, v0.t
+; CHECK-NEXT: vadd.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsra.vi v12, v12, 0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vvm v10, v12, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v12, v10, 0, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v11, -1, v0.t
+; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v10, v10, 0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
%vec = insertelement <vscale x 8 x i8> undef, i8 -1, i32 0
%splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -1422,8 +1615,26 @@ define <vscale x 8 x i8> @vpsrem_by_neg1_nxv8i8(<vscale x 8 x i8> %va, <vscale x
define <vscale x 4 x i16> @vpsrem_by_neg1_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsrem_by_neg1_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv.v.i v10, -1
+; CHECK-NEXT: vmv.v.i v11, 0
+; CHECK-NEXT: vmseq.vi v12, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vmor.mm v10, v10, v12
+; CHECK-NEXT: vsra.vi v12, v8, 15, v0.t
+; CHECK-NEXT: vsrl.vi v12, v12, 16, v0.t
+; CHECK-NEXT: vadd.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsra.vi v12, v12, 0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vvm v10, v12, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v12, v10, 0, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v11, -1, v0.t
+; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v10, v10, 0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
%vec = insertelement <vscale x 4 x i16> undef, i16 -1, i32 0
%splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -1434,8 +1645,27 @@ define <vscale x 4 x i16> @vpsrem_by_neg1_nxv4i16(<vscale x 4 x i16> %va, <vscal
define <vscale x 2 x i32> @vpsrem_by_neg1_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsrem_by_neg1_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv.v.i v10, -1
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vmv.v.i v11, 0
+; CHECK-NEXT: vmseq.vi v12, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vmor.mm v10, v10, v12
+; CHECK-NEXT: vsra.vi v12, v8, 31, v0.t
+; CHECK-NEXT: vsrl.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vadd.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsra.vi v12, v12, 0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vvm v10, v12, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v12, v10, 0, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v11, -1, v0.t
+; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v10, v10, 0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
%vec = insertelement <vscale x 2 x i32> undef, i32 -1, i32 0
%splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -1446,8 +1676,28 @@ define <vscale x 2 x i32> @vpsrem_by_neg1_nxv2i32(<vscale x 2 x i32> %va, <vscal
define <vscale x 1 x i64> @vpsrem_by_neg1_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpsrem_by_neg1_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv.v.i v10, -1
+; CHECK-NEXT: li a0, 63
+; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t
+; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t
+; CHECK-NEXT: vsra.vx v12, v8, a0, v0.t
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vmv.v.i v13, 0
+; CHECK-NEXT: vmor.mm v10, v10, v11
+; CHECK-NEXT: vsrl.vx v11, v12, a0, v0.t
+; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t
+; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t
+; CHECK-NEXT: vmv.v.v v0, v10
+; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v11, v10, 0, v0.t
+; CHECK-NEXT: vmsgt.vi v0, v13, -1, v0.t
+; CHECK-NEXT: vmerge.vvm v10, v10, v11, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vrsub.vi v10, v10, 0, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
%vec = insertelement <vscale x 1 x i64> undef, i64 -1, i32 0
%splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
@@ -1480,18 +1730,23 @@ define <vscale x 8 x i8> @vpsdivrem_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x
define <vscale x 8 x i8> @vpudivrem_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudivrem_nxv8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 37
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vmulhu.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv.v.i v10, 7
+; CHECK-NEXT: li a0, 37
+; CHECK-NEXT: vmulhu.vx v11, v8, a0, v0.t
; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t
-; CHECK-NEXT: vmulhu.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsub.vv v12, v8, v11, v0.t
+; CHECK-NEXT: vmulhu.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vadd.vv v11, v12, v11, v0.t
+; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t
+; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t
; CHECK-NEXT: li a0, 7
-; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v9, 2, v0.t
-; CHECK-NEXT: vmul.vx v10, v9, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmul.vx v11, v10, a0, v0.t
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vsub.vv v8, v8, v11, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x i8> @llvm.vp.urem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 7), <vscale x 8 x i1> %m, i32 %evl)
%w = call <vscale x 8 x i8> @llvm.vp.udiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 7), <vscale x 8 x i1> %m, i32 %evl)
>From 53b16ef51c0379812deeeead518ece3008143e29 Mon Sep 17 00:00:00 2001
From: Jesse Huang <jesse.huang at sifive.com>
Date: Wed, 19 Feb 2025 03:22:15 -0800
Subject: [PATCH 06/10] address comments
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 74ab35f8c5f05..5cd17a203dbf2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -27375,7 +27375,7 @@ SDValue DAGCombiner::visitVPUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
if (N1.getOpcode() == ISD::VP_SHL && N1->getOperand(2) == Mask &&
N1->getOperand(3) == VL) {
SDValue N10 = N1.getOperand(0);
- if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
+ if (isConstantOrConstantVector(N10, /*NoOpaques=*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N10)) {
SDValue LogBase2 = BuildLogBase2(N10, DL);
AddToWorklist(LogBase2.getNode());
@@ -27416,9 +27416,10 @@ SDValue DAGCombiner::visitVPUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// fold (udiv x, c) -> alternate
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isConstantOrConstantVector(N1) &&
- !TLI.isIntDivCheap(N->getValueType(0), Attr))
+ !TLI.isIntDivCheap(N->getValueType(0), Attr)) {
if (SDValue Op = BuildVPUDIV(N))
return Op;
+ }
return SDValue();
}
>From 127432cb1683513fa8cb6495520ad10d233e7e72 Mon Sep 17 00:00:00 2001
From: Jesse Huang <jesse.huang at sifive.com>
Date: Wed, 19 Feb 2025 03:25:37 -0800
Subject: [PATCH 07/10] Merge VP_MULHU/HS expand and custom part
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 72b6ba0c2d8ce..dc8603a5376cf 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1283,8 +1283,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);
// vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
- if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
+ if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) {
setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
+ } else {
+ setOperationAction({ISD::VP_MULHU, ISD::VP_MULHS}, VT, Expand);
+ }
setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS,
ISD::AVGCEILU, ISD::SADDSAT, ISD::UADDSAT,
@@ -1305,11 +1308,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(IntegerVPOps, VT, Custom);
- // Zve64* does not support VP_MULHU/S with nxvXi64.
- if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) {
- setOperationAction({ISD::VP_MULHU, ISD::VP_MULHS}, VT, Expand);
- }
-
if (Subtarget.hasStdExtZvkb())
setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom);
>From 2fd98d76fe3aed548edcd4669641c4a3419594a1 Mon Sep 17 00:00:00 2001
From: Jesse Huang <jesse.huang at sifive.com>
Date: Wed, 19 Feb 2025 10:50:20 -0800
Subject: [PATCH 08/10] Use splat constant and chagne undef to poison
---
.../CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll | 30 +-
llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll | 432 ++++++------------
2 files changed, 160 insertions(+), 302 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll
index 2fa4abb642270..d9bb93248c44e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll
@@ -15,9 +15,7 @@ define <vscale x 1 x i64> @vpudiv_by_const_no_add_nxv1i64(<vscale x 1 x i64> %va
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vdivu.vx v8, v8, a1, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 5, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 5), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -28,9 +26,7 @@ define <vscale x 1 x i64> @vpudiv_by_const_with_add_nxv1i64(<vscale x 1 x i64> %
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vdivu.vx v8, v8, a1, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 7, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 7), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -41,9 +37,7 @@ define <vscale x 1 x i64> @vpsdiv_const_no_ashr_nxv1i64(<vscale x 1 x i64> %va,
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 3, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 3), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -54,9 +48,7 @@ define <vscale x 1 x i64> @vpsdiv_const_ashr_nxv1i64(<vscale x 1 x i64> %va, <vs
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 5, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 5), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -67,9 +59,7 @@ define <vscale x 1 x i64> @vpsdiv_const_add_ashr_nxv1i64(<vscale x 1 x i64> %va,
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 15, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 15), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -80,7 +70,7 @@ define <vscale x 1 x i64> @vpsdiv_const_sub_ashr_nxv1i64(<vscale x 1 x i64> %va,
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 -3, i32 0
+ %vec = insertelement <vscale x 1 x i64> poison, i64 -3, i32 0
%splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
@@ -93,9 +83,7 @@ define <vscale x 1 x i64> @vpurem_by_const_nxv1i64(<vscale x 1 x i64> %va, <vsca
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vremu.vx v8, v8, a1, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 5, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 5), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -106,8 +94,6 @@ define <vscale x 1 x i64> @vpsrem_by_const_nxv1i64(<vscale x 1 x i64> %va, <vsca
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vrem.vx v8, v8, a1, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 5, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 5), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll
index b39fc392482cf..5ef604132a64c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll
@@ -31,9 +31,7 @@ define <vscale x 8 x i8> @vpudiv_by_max_nxv8i8(<vscale x 8 x i8> %va, <vscale x
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> undef, i8 255, i32 0
- %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i8> @llvm.vp.udiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.udiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 255), <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
@@ -45,9 +43,7 @@ define <vscale x 4 x i16> @vpudiv_by_max_nxv4i16(<vscale x 4 x i16> %va, <vscale
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> undef, i16 65535, i32 0
- %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.udiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.udiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 65535), <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
@@ -59,9 +55,7 @@ define <vscale x 2 x i32> @vpudiv_by_max_nxv2i32(<vscale x 2 x i32> %va, <vscale
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> undef, i32 4294967295, i32 0
- %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.udiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.udiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 4294967295), <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
@@ -73,9 +67,7 @@ define <vscale x 1 x i64> @vpudiv_by_max_nxv1i64(<vscale x 1 x i64> %va, <vscale
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 18446744073709551615, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 18446744073709551615), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -89,10 +81,8 @@ define <vscale x 8 x i8> @fold_vpudiv_vpurem_nxv8i8(<vscale x 8 x i8> %va, <vsca
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v9, v8
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> undef, i8 128, i32 0
- %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i8> @llvm.vp.udiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
- %u = call <vscale x 8 x i8> @llvm.vp.urem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.udiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 128), <vscale x 8 x i1> %m, i32 %evl)
+ %u = call <vscale x 8 x i8> @llvm.vp.urem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 128), <vscale x 8 x i1> %m, i32 %evl)
%x = add <vscale x 8 x i8> %v, %u
ret <vscale x 8 x i8> %x
}
@@ -107,10 +97,8 @@ define <vscale x 4 x i16> @fold_vpudiv_vpurem_nxv4i16(<vscale x 4 x i16> %va, <v
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v9, v8
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> undef, i16 16384, i32 0
- %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.udiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
- %u = call <vscale x 4 x i16> @llvm.vp.urem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.udiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 16384), <vscale x 4 x i1> %m, i32 %evl)
+ %u = call <vscale x 4 x i16> @llvm.vp.urem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 16384), <vscale x 4 x i1> %m, i32 %evl)
%x = add <vscale x 4 x i16> %v, %u
ret <vscale x 4 x i16> %x
}
@@ -125,10 +113,8 @@ define <vscale x 2 x i32> @fold_vpudiv_vpurem_nxv2i32(<vscale x 2 x i32> %va, <v
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v9, v8
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> undef, i32 16384, i32 0
- %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.udiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
- %u = call <vscale x 2 x i32> @llvm.vp.urem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.udiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 16384), <vscale x 2 x i1> %m, i32 %evl)
+ %u = call <vscale x 2 x i32> @llvm.vp.urem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 16384), <vscale x 2 x i1> %m, i32 %evl)
%x = add <vscale x 2 x i32> %v, %u
ret <vscale x 2 x i32> %x
}
@@ -143,10 +129,8 @@ define <vscale x 1 x i64> @fold_vpudiv_vpurem_nxv1i64(<vscale x 1 x i64> %va, <v
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v9, v8
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 16384, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
- %u = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 16384), <vscale x 1 x i1> %m, i32 %evl)
+ %u = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 16384), <vscale x 1 x i1> %m, i32 %evl)
%x = add <vscale x 1 x i64> %v, %u
ret <vscale x 1 x i64> %x
}
@@ -160,7 +144,7 @@ define <vscale x 8 x i8> @vpudiv_by_shl2_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <v
; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
%sh = shl i8 2, %b
- %vec = insertelement <vscale x 8 x i8> undef, i8 %sh, i32 0
+ %vec = insertelement <vscale x 8 x i8> poison, i8 %sh, i32 0
%splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x i8> @llvm.vp.udiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
@@ -176,7 +160,7 @@ define <vscale x 4 x i16> @vpudiv_by_shl2_nxv4i16(<vscale x 4 x i16> %va, i16 %b
; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
%sh = shl i16 2, %b
- %vec = insertelement <vscale x 4 x i16> undef, i16 %sh, i32 0
+ %vec = insertelement <vscale x 4 x i16> poison, i16 %sh, i32 0
%splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x i16> @llvm.vp.udiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
@@ -190,7 +174,7 @@ define <vscale x 2 x i32> @vpudiv_by_shl2_nxv2i32(<vscale x 2 x i32> %va, i32 %b
; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
%sh = shl i32 2, %b
- %vec = insertelement <vscale x 2 x i32> undef, i32 %sh, i32 0
+ %vec = insertelement <vscale x 2 x i32> poison, i32 %sh, i32 0
%splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x i32> @llvm.vp.udiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
@@ -204,7 +188,7 @@ define <vscale x 1 x i64> @vpudiv_by_shl2_nxv1i64(<vscale x 1 x i64> %va, i64 %b
; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
%sh = shl i64 2, %b
- %vec = insertelement <vscale x 1 x i64> undef, i64 %sh, i32 0
+ %vec = insertelement <vscale x 1 x i64> poison, i64 %sh, i32 0
%splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
@@ -218,11 +202,9 @@ define <vscale x 8 x i8> @vpudiv_by_vpshl2_nxv8i8(<vscale x 8 x i8> %va, i8 %b,
; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t
; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec1 = insertelement <vscale x 8 x i8> undef, i8 4, i32 0
- %splat1 = shufflevector <vscale x 8 x i8> %vec1, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %vec2 = insertelement <vscale x 8 x i8> undef, i8 %b, i32 0
+ %vec2 = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
%splat2 = shufflevector <vscale x 8 x i8> %vec2, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %sh = call <vscale x 8 x i8> @llvm.vp.shl.nxv8i8(<vscale x 8 x i8> %splat1, <vscale x 8 x i8> %splat2, <vscale x 8 x i1> %m, i32 %evl)
+ %sh = call <vscale x 8 x i8> @llvm.vp.shl.nxv8i8(<vscale x 8 x i8> splat (i8 4), <vscale x 8 x i8> %splat2, <vscale x 8 x i1> %m, i32 %evl)
%v = call <vscale x 8 x i8> @llvm.vp.udiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %sh, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
@@ -235,11 +217,9 @@ define <vscale x 4 x i16> @vpudiv_by_vpshl2_nxv4i16(<vscale x 4 x i16> %va, i16
; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t
; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec1 = insertelement <vscale x 4 x i16> undef, i16 4, i32 0
- %splat1 = shufflevector <vscale x 4 x i16> %vec1, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %vec2 = insertelement <vscale x 4 x i16> undef, i16 %b, i32 0
+ %vec2 = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
%splat2 = shufflevector <vscale x 4 x i16> %vec2, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %sh = call <vscale x 4 x i16> @llvm.vp.shl.nxv4i16(<vscale x 4 x i16> %splat1, <vscale x 4 x i16> %splat2, <vscale x 4 x i1> %m, i32 %evl)
+ %sh = call <vscale x 4 x i16> @llvm.vp.shl.nxv4i16(<vscale x 4 x i16> splat (i16 4), <vscale x 4 x i16> %splat2, <vscale x 4 x i1> %m, i32 %evl)
%v = call <vscale x 4 x i16> @llvm.vp.udiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %sh, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
@@ -252,11 +232,9 @@ define <vscale x 2 x i32> @vpudiv_by_vpshl2_nxv2i32(<vscale x 2 x i32> %va, i32
; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t
; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec1 = insertelement <vscale x 2 x i32> undef, i32 4, i32 0
- %splat1 = shufflevector <vscale x 2 x i32> %vec1, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %vec2 = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
+ %vec2 = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
%splat2 = shufflevector <vscale x 2 x i32> %vec2, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %sh = call <vscale x 2 x i32> @llvm.vp.shl.nxv2i32(<vscale x 2 x i32> %splat1, <vscale x 2 x i32> %splat2, <vscale x 2 x i1> %m, i32 %evl)
+ %sh = call <vscale x 2 x i32> @llvm.vp.shl.nxv2i32(<vscale x 2 x i32> splat (i32 4), <vscale x 2 x i32> %splat2, <vscale x 2 x i1> %m, i32 %evl)
%v = call <vscale x 2 x i32> @llvm.vp.udiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %sh, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
@@ -269,11 +247,9 @@ define <vscale x 1 x i64> @vpudiv_by_vpshl2_nxv1i64(<vscale x 1 x i64> %va, i64
; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t
; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec1 = insertelement <vscale x 1 x i64> undef, i64 4, i32 0
- %splat1 = shufflevector <vscale x 1 x i64> %vec1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %vec2 = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
+ %vec2 = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
%splat2 = shufflevector <vscale x 1 x i64> %vec2, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %sh = call <vscale x 1 x i64> @llvm.vp.shl.nxv1i64(<vscale x 1 x i64> %splat1, <vscale x 1 x i64> %splat2, <vscale x 1 x i1> %m, i32 %evl)
+ %sh = call <vscale x 1 x i64> @llvm.vp.shl.nxv1i64(<vscale x 1 x i64> splat (i64 4), <vscale x 1 x i64> %splat2, <vscale x 1 x i1> %m, i32 %evl)
%v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %sh, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -281,53 +257,47 @@ define <vscale x 1 x i64> @vpudiv_by_vpshl2_nxv1i64(<vscale x 1 x i64> %va, i64
define <vscale x 8 x i8> @vpudiv_by_const_no_add_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_const_no_add_nxv8i8:
; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -51
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 5
-; CHECK-NEXT: li a0, -51
-; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
-; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t
-; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: vmulhu.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vsrl.vi v9, v9, 2, v0.t
+; CHECK-NEXT: vmv.v.i v10, 5
+; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> undef, i8 5, i32 0
- %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i8> @llvm.vp.udiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.udiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 5), <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
define <vscale x 4 x i16> @vpudiv_by_const_no_add_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_const_no_add_nxv4i16:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 1048573
+; CHECK-NEXT: addi a1, a1, -819
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 5
-; CHECK-NEXT: lui a0, 1048573
-; CHECK-NEXT: addi a0, a0, -819
-; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
-; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t
-; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: vmulhu.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vsrl.vi v9, v9, 2, v0.t
+; CHECK-NEXT: vmv.v.i v10, 5
+; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> undef, i16 5, i32 0
- %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.udiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.udiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 5), <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
define <vscale x 2 x i32> @vpudiv_by_const_no_add_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_const_no_add_nxv2i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 838861
+; CHECK-NEXT: addi a1, a1, -819
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 5
-; CHECK-NEXT: lui a0, 838861
-; CHECK-NEXT: addi a0, a0, -819
-; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
-; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t
-; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: vmulhu.vx v9, v8, a1, v0.t
+; CHECK-NEXT: vsrl.vi v9, v9, 2, v0.t
+; CHECK-NEXT: vmv.v.i v10, 5
+; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> undef, i32 5, i32 0
- %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.udiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.udiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 5), <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
@@ -346,74 +316,66 @@ define <vscale x 1 x i64> @vpudiv_by_const_no_add_nxv1i64(<vscale x 1 x i64> %va
; CHECK-NEXT: vmv.v.v v0, v9
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 5, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 5), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
define <vscale x 8 x i8> @vpudiv_by_const_with_add_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_const_with_add_nxv8i8:
; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 37
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 7
-; CHECK-NEXT: li a0, 37
-; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vmulhu.vx v9, v8, a1, v0.t
; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t
-; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t
-; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t
-; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
-; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t
-; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t
+; CHECK-NEXT: vmulhu.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t
+; CHECK-NEXT: vmv.v.i v10, 7
+; CHECK-NEXT: vsrl.vi v9, v9, 2, v0.t
+; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> undef, i8 7, i32 0
- %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i8> @llvm.vp.udiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.udiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 7), <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
define <vscale x 4 x i16> @vpudiv_by_const_with_add_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_const_with_add_nxv4i16:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 2
+; CHECK-NEXT: addi a1, a1, 1171
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 7
-; CHECK-NEXT: lui a0, 2
-; CHECK-NEXT: addi a0, a0, 1171
-; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vmulhu.vx v9, v8, a1, v0.t
; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t
-; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t
-; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t
-; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
-; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t
-; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t
+; CHECK-NEXT: vmulhu.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t
+; CHECK-NEXT: vmv.v.i v10, 7
+; CHECK-NEXT: vsrl.vi v9, v9, 2, v0.t
+; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> undef, i16 7, i32 0
- %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.udiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.udiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 7), <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
define <vscale x 2 x i32> @vpudiv_by_const_with_add_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpudiv_by_const_with_add_nxv2i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 149797
+; CHECK-NEXT: addi a1, a1, -1755
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 7
-; CHECK-NEXT: lui a0, 149797
-; CHECK-NEXT: addi a0, a0, -1755
-; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t
+; CHECK-NEXT: vmulhu.vx v9, v8, a1, v0.t
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t
-; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t
-; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t
-; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
-; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t
-; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t
+; CHECK-NEXT: vmulhu.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t
+; CHECK-NEXT: vmv.v.i v10, 7
+; CHECK-NEXT: vsrl.vi v9, v9, 2, v0.t
+; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> undef, i32 7, i32 0
- %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.udiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.udiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 7), <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
@@ -423,20 +385,18 @@ define <vscale x 1 x i64> @vpudiv_by_const_with_add_nxv1i64(<vscale x 1 x i64> %
; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
; CHECK-NEXT: ld a1, %lo(.LCPI23_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 7
-; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t
+; CHECK-NEXT: vmulhu.vx v9, v8, a1, v0.t
; CHECK-NEXT: li a0, -1
; CHECK-NEXT: slli a0, a0, 63
-; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t
-; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t
-; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t
-; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t
-; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t
-; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t
+; CHECK-NEXT: vmulhu.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t
+; CHECK-NEXT: vmv.v.i v10, 7
+; CHECK-NEXT: vsrl.vi v9, v9, 2, v0.t
+; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 7, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 7), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -446,7 +406,7 @@ define <vscale x 8 x i8> @vpsdiv_by_neg1_nxv8i8(<vscale x 8 x i8> %va, <vscale x
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> undef, i8 -1, i32 0
+ %vec = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
%splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
@@ -458,7 +418,7 @@ define <vscale x 1 x i64> @vpsdiv_by_neg1_nxv1i64(<vscale x 1 x i64> %va, <vscal
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 -1, i32 0
+ %vec = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
%splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
@@ -470,7 +430,7 @@ define <vscale x 4 x i16> @vpsdiv_by_neg1_nxv4i16(<vscale x 4 x i16> %va, <vscal
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> undef, i16 -1, i32 0
+ %vec = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
%splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
@@ -482,7 +442,7 @@ define <vscale x 2 x i32> @vpsdiv_by_neg1_nxv2i32(<vscale x 2 x i32> %va, <vscal
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> undef, i32 -1, i32 0
+ %vec = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
%splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
@@ -497,7 +457,7 @@ define <vscale x 8 x i8> @vpsdiv_by_min_nxv8i8(<vscale x 8 x i8> %va, <vscale x
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> undef, i8 -128, i32 0
+ %vec = insertelement <vscale x 8 x i8> poison, i8 -128, i32 0
%splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
@@ -513,7 +473,7 @@ define <vscale x 1 x i64> @vpsdiv_by_min_nxv1i64(<vscale x 1 x i64> %va, <vscale
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 -9223372036854775808, i32 0
+ %vec = insertelement <vscale x 1 x i64> poison, i64 -9223372036854775808, i32 0
%splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
@@ -528,7 +488,7 @@ define <vscale x 4 x i16> @vpsdiv_by_min_nxv4i16(<vscale x 4 x i16> %va, <vscale
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> undef, i16 -32768, i32 0
+ %vec = insertelement <vscale x 4 x i16> poison, i16 -32768, i32 0
%splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
@@ -543,7 +503,7 @@ define <vscale x 2 x i32> @vpsdiv_by_min_nxv2i32(<vscale x 2 x i32> %va, <vscale
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> undef, i32 -2147483648, i32 0
+ %vec = insertelement <vscale x 2 x i32> poison, i32 -2147483648, i32 0
%splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
@@ -570,9 +530,7 @@ define <vscale x 4 x i16> @vpsdiv_pow2_nxv4i16(<vscale x 4 x i16> %va, <vscale x
; CHECK-NEXT: vmsgt.vi v0, v12, 4, v0.t
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> undef, i16 4, i32 0
- %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 4), <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
@@ -597,9 +555,7 @@ define <vscale x 8 x i8> @vpsdiv_pow2_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8
; CHECK-NEXT: vmsgt.vi v0, v12, 4, v0.t
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> undef, i8 4, i32 0
- %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 4), <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
@@ -624,9 +580,7 @@ define <vscale x 2 x i32> @vpsdiv_pow2_nxv2i32(<vscale x 2 x i32> %va, <vscale x
; CHECK-NEXT: vmsgt.vi v0, v12, 4, v0.t
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> undef, i32 4, i32 0
- %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 4), <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
@@ -653,9 +607,7 @@ define <vscale x 1 x i64> @vpsdiv_pow2_nxv1i64(<vscale x 1 x i64> %va, <vscale x
; CHECK-NEXT: vmsgt.vi v0, v12, 4, v0.t
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 4, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 4), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -670,9 +622,7 @@ define <vscale x 8 x i8> @vpsdiv_const_no_ashr_nxv8i8(<vscale x 8 x i8> %va, <vs
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> undef, i8 3, i32 0
- %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 3), <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
@@ -688,9 +638,7 @@ define <vscale x 4 x i16> @vpsdiv_const_no_ashr_nxv4i16(<vscale x 4 x i16> %va,
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> undef, i16 3, i32 0
- %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 3), <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
@@ -706,9 +654,7 @@ define <vscale x 2 x i32> @vpsdiv_const_no_ashr_nxv2i32(<vscale x 2 x i32> %va,
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> undef, i32 3, i32 0
- %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 3), <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
@@ -725,9 +671,7 @@ define <vscale x 1 x i64> @vpsdiv_const_no_ashr_nxv1i64(<vscale x 1 x i64> %va,
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 3, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 3), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -742,9 +686,7 @@ define <vscale x 8 x i8> @vpsdiv_const_ashr_nxv8i8(<vscale x 8 x i8> %va, <vscal
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> undef, i8 5, i32 0
- %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 5), <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
@@ -760,9 +702,7 @@ define <vscale x 4 x i16> @vpsdiv_const_ashr_nxv4i16(<vscale x 4 x i16> %va, <vs
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> undef, i16 5, i32 0
- %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 5), <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
@@ -778,9 +718,7 @@ define <vscale x 2 x i32> @vpsdiv_const_ashr_nxv2i32(<vscale x 2 x i32> %va, <vs
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> undef, i32 5, i32 0
- %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 5), <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
@@ -797,9 +735,7 @@ define <vscale x 1 x i64> @vpsdiv_const_ashr_nxv1i64(<vscale x 1 x i64> %va, <vs
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 5, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 5), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -815,9 +751,7 @@ define <vscale x 8 x i8> @vpsdiv_const_add_ashr_nxv8i8(<vscale x 8 x i8> %va, <v
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> undef, i8 7, i32 0
- %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 7), <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
@@ -834,9 +768,7 @@ define <vscale x 4 x i16> @vpsdiv_const_add_ashr_nxv4i16(<vscale x 4 x i16> %va,
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> undef, i16 15, i32 0
- %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 15), <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
@@ -853,9 +785,7 @@ define <vscale x 2 x i32> @vpsdiv_const_add_ashr_nxv2i32(<vscale x 2 x i32> %va,
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> undef, i32 7, i32 0
- %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 7), <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
@@ -875,9 +805,7 @@ define <vscale x 1 x i64> @vpsdiv_const_add_ashr_nxv1i64(<vscale x 1 x i64> %va,
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 15, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 15), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -893,7 +821,7 @@ define <vscale x 8 x i8> @vpsdiv_const_sub_ashr_nxv8i8(<vscale x 8 x i8> %va, <v
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> undef, i8 -7, i32 0
+ %vec = insertelement <vscale x 8 x i8> poison, i8 -7, i32 0
%splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
@@ -912,7 +840,7 @@ define <vscale x 4 x i16> @vpsdiv_const_sub_ashr_nxv4i16(<vscale x 4 x i16> %va,
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> undef, i16 -15, i32 0
+ %vec = insertelement <vscale x 4 x i16> poison, i16 -15, i32 0
%splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
@@ -931,7 +859,7 @@ define <vscale x 2 x i32> @vpsdiv_const_sub_ashr_nxv2i32(<vscale x 2 x i32> %va,
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> undef, i32 -7, i32 0
+ %vec = insertelement <vscale x 2 x i32> poison, i32 -7, i32 0
%splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
@@ -953,7 +881,7 @@ define <vscale x 1 x i64> @vpsdiv_const_sub_ashr_nxv1i64(<vscale x 1 x i64> %va,
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 -3, i32 0
+ %vec = insertelement <vscale x 1 x i64> poison, i64 -3, i32 0
%splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
@@ -966,9 +894,7 @@ define <vscale x 1 x i64> @vpurem_by_max_nxv1i64(<vscale x 1 x i64> %va, <vscale
; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t
; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 18446744073709551615, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 18446744073709551615), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -979,9 +905,7 @@ define <vscale x 4 x i16> @vpurem_by_max_nxv4i16(<vscale x 4 x i16> %va, <vscale
; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t
; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> undef, i16 65535, i32 0
- %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.urem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.urem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 65535), <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
@@ -992,9 +916,7 @@ define <vscale x 8 x i8> @vpurem_by_max_nxv8i8(<vscale x 8 x i8> %va, <vscale x
; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t
; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> undef, i8 255, i32 0
- %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i8> @llvm.vp.urem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.urem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 255), <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
@@ -1005,9 +927,7 @@ define <vscale x 2 x i32> @vpurem_by_max_nxv2i32(<vscale x 2 x i32> %va, <vscale
; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t
; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> undef, i32 4294967295, i32 0
- %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.urem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.urem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 4294967295), <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
@@ -1031,9 +951,7 @@ define <vscale x 1 x i64> @vpurem_by_const_nxv1i64(<vscale x 1 x i64> %va, <vsca
; CHECK-NEXT: vmul.vx v10, v10, a0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 5, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 5), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -1042,8 +960,8 @@ define <vscale x 4 x i16> @vpurem_by_const_nxv4i16(<vscale x 4 x i16> %va, <vsca
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vmv.v.i v10, 5
; CHECK-NEXT: lui a0, 1048573
+; CHECK-NEXT: vmv.v.i v10, 5
; CHECK-NEXT: addi a0, a0, -819
; CHECK-NEXT: vmulhu.vx v11, v8, a0, v0.t
; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t
@@ -1054,9 +972,7 @@ define <vscale x 4 x i16> @vpurem_by_const_nxv4i16(<vscale x 4 x i16> %va, <vsca
; CHECK-NEXT: vmul.vx v10, v10, a0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> undef, i16 5, i32 0
- %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.urem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.urem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 5), <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
@@ -1065,8 +981,8 @@ define <vscale x 8 x i8> @vpurem_by_const_nxv8i8(<vscale x 8 x i8> %va, <vscale
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vmv.v.i v10, 5
; CHECK-NEXT: li a0, -51
+; CHECK-NEXT: vmv.v.i v10, 5
; CHECK-NEXT: vmulhu.vx v11, v8, a0, v0.t
; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t
; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t
@@ -1076,9 +992,7 @@ define <vscale x 8 x i8> @vpurem_by_const_nxv8i8(<vscale x 8 x i8> %va, <vscale
; CHECK-NEXT: vmul.vx v10, v10, a0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> undef, i8 5, i32 0
- %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i8> @llvm.vp.urem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.urem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 5), <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
@@ -1087,8 +1001,8 @@ define <vscale x 2 x i32> @vpurem_by_const_nxv2i32(<vscale x 2 x i32> %va, <vsca
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vmv.v.i v10, 5
; CHECK-NEXT: lui a0, 838861
+; CHECK-NEXT: vmv.v.i v10, 5
; CHECK-NEXT: addi a0, a0, -819
; CHECK-NEXT: vmulhu.vx v11, v8, a0, v0.t
; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t
@@ -1099,9 +1013,7 @@ define <vscale x 2 x i32> @vpurem_by_const_nxv2i32(<vscale x 2 x i32> %va, <vsca
; CHECK-NEXT: vmul.vx v10, v10, a0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> undef, i32 5, i32 0
- %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.urem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.urem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 5), <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
@@ -1121,9 +1033,7 @@ define <vscale x 1 x i64> @vpsrem_by_const_nxv1i64(<vscale x 1 x i64> %va, <vsca
; CHECK-NEXT: vmul.vx v9, v9, a0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 5, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 5), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -1142,9 +1052,7 @@ define <vscale x 4 x i16> @vpsrem_by_const_nxv4i16(<vscale x 4 x i16> %va, <vsca
; CHECK-NEXT: vmul.vx v9, v9, a0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> undef, i16 5, i32 0
- %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.srem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.srem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 5), <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
@@ -1162,9 +1070,7 @@ define <vscale x 8 x i8> @vpsrem_by_const_nxv8i8(<vscale x 8 x i8> %va, <vscale
; CHECK-NEXT: vmul.vx v9, v9, a0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> undef, i8 5, i32 0
- %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i8> @llvm.vp.srem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.srem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 5), <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
@@ -1183,9 +1089,7 @@ define <vscale x 2 x i32> @vpsrem_by_const_nxv2i32(<vscale x 2 x i32> %va, <vsca
; CHECK-NEXT: vmul.vx v9, v9, a0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> undef, i32 5, i32 0
- %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.srem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.srem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 5), <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
@@ -1210,9 +1114,7 @@ define <vscale x 8 x i8> @vpudiv_by_1_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8
; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> undef, i8 1, i32 0
- %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 1), <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
@@ -1237,9 +1139,7 @@ define <vscale x 4 x i16> @vpudiv_by_1_nxv4i16(<vscale x 4 x i16> %va, <vscale x
; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> undef, i16 1, i32 0
- %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 1), <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
@@ -1265,9 +1165,7 @@ define <vscale x 2 x i32> @vpudiv_by_1_nxv2i32(<vscale x 2 x i32> %va, <vscale x
; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> undef, i32 1, i32 0
- %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 1), <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
@@ -1294,9 +1192,7 @@ define <vscale x 1 x i64> @vpudiv_by_1_nxv1i64(<vscale x 1 x i64> %va, <vscale x
; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 1, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 1), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -1321,9 +1217,7 @@ define <vscale x 8 x i8> @vpsdiv_by_1_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8
; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> undef, i8 1, i32 0
- %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 1), <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
@@ -1348,9 +1242,7 @@ define <vscale x 4 x i16> @vpsdiv_by_1_nxv4i16(<vscale x 4 x i16> %va, <vscale x
; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> undef, i16 1, i32 0
- %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 1), <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
@@ -1376,9 +1268,7 @@ define <vscale x 2 x i32> @vpsdiv_by_1_nxv2i32(<vscale x 2 x i32> %va, <vscale x
; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> undef, i32 1, i32 0
- %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 1), <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
@@ -1405,9 +1295,7 @@ define <vscale x 1 x i64> @vpsdiv_by_1_nxv1i64(<vscale x 1 x i64> %va, <vscale x
; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 1, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 1), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -1418,9 +1306,7 @@ define <vscale x 8 x i8> @vpurem_by_1_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8
; CHECK-NEXT: vsrl.vi v9, v8, 0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> undef, i8 1, i32 0
- %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i8> @llvm.vp.urem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.urem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 1), <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
@@ -1431,9 +1317,7 @@ define <vscale x 4 x i16> @vpurem_by_1_nxv4i16(<vscale x 4 x i16> %va, <vscale x
; CHECK-NEXT: vsrl.vi v9, v8, 0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> undef, i16 1, i32 0
- %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.urem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.urem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 1), <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
@@ -1444,9 +1328,7 @@ define <vscale x 2 x i32> @vpurem_by_1_nxv2i32(<vscale x 2 x i32> %va, <vscale x
; CHECK-NEXT: vsrl.vi v9, v8, 0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> undef, i32 1, i32 0
- %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.urem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.urem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 1), <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
@@ -1457,9 +1339,7 @@ define <vscale x 1 x i64> @vpurem_by_1_nxv1i64(<vscale x 1 x i64> %va, <vscale x
; CHECK-NEXT: vsrl.vi v9, v8, 0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 1, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 1), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -1486,9 +1366,7 @@ define <vscale x 8 x i8> @vpsrem_by_1_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> undef, i8 1, i32 0
- %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i8> @llvm.vp.srem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.srem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 1), <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
@@ -1515,9 +1393,7 @@ define <vscale x 4 x i16> @vpsrem_by_1_nxv4i16(<vscale x 4 x i16> %va, <vscale x
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> undef, i16 1, i32 0
- %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.srem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.srem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 1), <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
@@ -1545,9 +1421,7 @@ define <vscale x 2 x i32> @vpsrem_by_1_nxv2i32(<vscale x 2 x i32> %va, <vscale x
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> undef, i32 1, i32 0
- %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.srem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.srem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 1), <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
@@ -1576,9 +1450,7 @@ define <vscale x 1 x i64> @vpsrem_by_1_nxv1i64(<vscale x 1 x i64> %va, <vscale x
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 1, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 1), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -1606,7 +1478,7 @@ define <vscale x 8 x i8> @vpsrem_by_neg1_nxv8i8(<vscale x 8 x i8> %va, <vscale x
; CHECK-NEXT: vrsub.vi v10, v10, 0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> undef, i8 -1, i32 0
+ %vec = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
%splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x i8> @llvm.vp.srem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
@@ -1636,7 +1508,7 @@ define <vscale x 4 x i16> @vpsrem_by_neg1_nxv4i16(<vscale x 4 x i16> %va, <vscal
; CHECK-NEXT: vrsub.vi v10, v10, 0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> undef, i16 -1, i32 0
+ %vec = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
%splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x i16> @llvm.vp.srem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
@@ -1667,7 +1539,7 @@ define <vscale x 2 x i32> @vpsrem_by_neg1_nxv2i32(<vscale x 2 x i32> %va, <vscal
; CHECK-NEXT: vrsub.vi v10, v10, 0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> undef, i32 -1, i32 0
+ %vec = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
%splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x i32> @llvm.vp.srem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
@@ -1699,7 +1571,7 @@ define <vscale x 1 x i64> @vpsrem_by_neg1_nxv1i64(<vscale x 1 x i64> %va, <vscal
; CHECK-NEXT: vrsub.vi v10, v10, 0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> undef, i64 -1, i32 0
+ %vec = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
%splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
>From 12cc971715cbb3de9121a510356c57a8d06e8ec4 Mon Sep 17 00:00:00 2001
From: Jesse Huang <jesse.huang at sifive.com>
Date: Thu, 20 Feb 2025 05:34:01 -0800
Subject: [PATCH 09/10] !fixup didn't merge negative constants
---
.../CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll | 4 +-
llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll | 64 +++++--------------
2 files changed, 17 insertions(+), 51 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll
index d9bb93248c44e..2972df3e1cf7a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll
@@ -70,9 +70,7 @@ define <vscale x 1 x i64> @vpsdiv_const_sub_ashr_nxv1i64(<vscale x 1 x i64> %va,
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> poison, i64 -3, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 -3), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll
index 5ef604132a64c..c5159f7789d80 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll
@@ -406,9 +406,7 @@ define <vscale x 8 x i8> @vpsdiv_by_neg1_nxv8i8(<vscale x 8 x i8> %va, <vscale x
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
- %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 -1), <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
@@ -418,9 +416,7 @@ define <vscale x 1 x i64> @vpsdiv_by_neg1_nxv1i64(<vscale x 1 x i64> %va, <vscal
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 -1), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -430,9 +426,7 @@ define <vscale x 4 x i16> @vpsdiv_by_neg1_nxv4i16(<vscale x 4 x i16> %va, <vscal
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
- %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 -1), <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
@@ -442,9 +436,7 @@ define <vscale x 2 x i32> @vpsdiv_by_neg1_nxv2i32(<vscale x 2 x i32> %va, <vscal
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
- %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 -1), <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
@@ -457,9 +449,7 @@ define <vscale x 8 x i8> @vpsdiv_by_min_nxv8i8(<vscale x 8 x i8> %va, <vscale x
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> poison, i8 -128, i32 0
- %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 -128), <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
@@ -473,9 +463,7 @@ define <vscale x 1 x i64> @vpsdiv_by_min_nxv1i64(<vscale x 1 x i64> %va, <vscale
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> poison, i64 -9223372036854775808, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 -9223372036854775808), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -488,9 +476,7 @@ define <vscale x 4 x i16> @vpsdiv_by_min_nxv4i16(<vscale x 4 x i16> %va, <vscale
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> poison, i16 -32768, i32 0
- %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 -32768), <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
@@ -503,9 +489,7 @@ define <vscale x 2 x i32> @vpsdiv_by_min_nxv2i32(<vscale x 2 x i32> %va, <vscale
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> poison, i32 -2147483648, i32 0
- %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 -2147483648), <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
@@ -821,9 +805,7 @@ define <vscale x 8 x i8> @vpsdiv_const_sub_ashr_nxv8i8(<vscale x 8 x i8> %va, <v
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> poison, i8 -7, i32 0
- %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.sdiv.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 -7), <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
@@ -840,9 +822,7 @@ define <vscale x 4 x i16> @vpsdiv_const_sub_ashr_nxv4i16(<vscale x 4 x i16> %va,
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> poison, i16 -15, i32 0
- %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.sdiv.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 -15), <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
@@ -859,9 +839,7 @@ define <vscale x 2 x i32> @vpsdiv_const_sub_ashr_nxv2i32(<vscale x 2 x i32> %va,
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> poison, i32 -7, i32 0
- %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.sdiv.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 -7), <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
@@ -881,9 +859,7 @@ define <vscale x 1 x i64> @vpsdiv_const_sub_ashr_nxv1i64(<vscale x 1 x i64> %va,
; CHECK-NEXT: vand.vi v9, v9, -1, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> poison, i64 -3, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 -3), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
@@ -1478,9 +1454,7 @@ define <vscale x 8 x i8> @vpsrem_by_neg1_nxv8i8(<vscale x 8 x i8> %va, <vscale x
; CHECK-NEXT: vrsub.vi v10, v10, 0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
- %splat = shufflevector <vscale x 8 x i8> %vec, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
- %v = call <vscale x 8 x i8> @llvm.vp.srem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %splat, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i8> @llvm.vp.srem.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> splat (i8 -1), <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
@@ -1508,9 +1482,7 @@ define <vscale x 4 x i16> @vpsrem_by_neg1_nxv4i16(<vscale x 4 x i16> %va, <vscal
; CHECK-NEXT: vrsub.vi v10, v10, 0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
- %splat = shufflevector <vscale x 4 x i16> %vec, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
- %v = call <vscale x 4 x i16> @llvm.vp.srem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %splat, <vscale x 4 x i1> %m, i32 %evl)
+ %v = call <vscale x 4 x i16> @llvm.vp.srem.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> splat (i16 -1), <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
@@ -1539,9 +1511,7 @@ define <vscale x 2 x i32> @vpsrem_by_neg1_nxv2i32(<vscale x 2 x i32> %va, <vscal
; CHECK-NEXT: vrsub.vi v10, v10, 0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
- %splat = shufflevector <vscale x 2 x i32> %vec, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
- %v = call <vscale x 2 x i32> @llvm.vp.srem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %splat, <vscale x 2 x i1> %m, i32 %evl)
+ %v = call <vscale x 2 x i32> @llvm.vp.srem.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> splat (i32 -1), <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
@@ -1571,9 +1541,7 @@ define <vscale x 1 x i64> @vpsrem_by_neg1_nxv1i64(<vscale x 1 x i64> %va, <vscal
; CHECK-NEXT: vrsub.vi v10, v10, 0, v0.t
; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
; CHECK-NEXT: ret
- %vec = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
- %splat = shufflevector <vscale x 1 x i64> %vec, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
- %v = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %splat, <vscale x 1 x i1> %m, i32 %evl)
+ %v = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> splat (i64 -1), <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
>From 69c6cbce5a06a1d730e5b2afc9e1ed81abd61b42 Mon Sep 17 00:00:00 2001
From: Jesse Huang <jesse.huang at sifive.com>
Date: Tue, 25 Feb 2025 00:54:41 -0800
Subject: [PATCH 10/10] Merge BuildVPU(S)DIV into BuildU(S)DIV
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 58 +++++--------------
1 file changed, 16 insertions(+), 42 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5cd17a203dbf2..bee77824c1d8b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -566,10 +566,8 @@ namespace {
SDValue visitVPUDIV(SDNode *N);
SDValue visitVPUDIVLike(SDValue N0, SDValue N1, SDNode *N);
- SDValue BuildVPUDIV(SDNode *N);
SDValue visitVPSDIV(SDNode *N);
SDValue visitVPSDIVLike(SDValue N0, SDValue N1, SDNode *N);
- SDValue BuildVPSDIV(SDNode *N);
SDValue visitVPREM(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
@@ -27280,42 +27278,6 @@ SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::BuildVPUDIV(SDNode *N) {
- // when optimising for minimum size, we don't want to expand a div to a mul
- // and a shift.
- if (DAG.getMachineFunction().getFunction().hasMinSize())
- return SDValue();
-
- SmallVector<SDNode *, 8> Built;
- if (SDValue S = TLI.BuildVPUDIV(N, DAG, LegalOperations, Built)) {
- for (SDNode *N : Built)
- AddToWorklist(N);
- return S;
- }
-
- return SDValue();
-}
-
-/// Given an ISD::VP_SDIV node expressing a divide by constant, return
-/// a DAG expression to select that will generate the same value by multiplying
-/// by a magic number.
-/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
-SDValue DAGCombiner::BuildVPSDIV(SDNode *N) {
- // when optimising for minimum size, we don't want to expand a div to a mul
- // and a shift.
- if (DAG.getMachineFunction().getFunction().hasMinSize())
- return SDValue();
-
- SmallVector<SDNode *, 8> Built;
- if (SDValue S = TLI.BuildVPSDIV(N, DAG, LegalOperations, Built)) {
- for (SDNode *N : Built)
- AddToWorklist(N);
- return S;
- }
-
- return SDValue();
-}
-
SDValue DAGCombiner::visitVPUDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -27417,7 +27379,7 @@ SDValue DAGCombiner::visitVPUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isConstantOrConstantVector(N1) &&
!TLI.isIntDivCheap(N->getValueType(0), Attr)) {
- if (SDValue Op = BuildVPUDIV(N))
+ if (SDValue Op = BuildUDIV(N))
return Op;
}
return SDValue();
@@ -27537,7 +27499,7 @@ SDValue DAGCombiner::visitVPSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isConstantOrConstantVector(N1) &&
!TLI.isIntDivCheap(N->getValueType(0), Attr))
- if (SDValue Op = BuildVPSDIV(N))
+ if (SDValue Op = BuildSDIV(N))
return Op;
return SDValue();
@@ -28640,7 +28602,13 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
return SDValue();
SmallVector<SDNode *, 8> Built;
- if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
+ SDValue S;
+ if (N->isVPOpcode())
+ S = TLI.BuildVPSDIV(N, DAG, LegalOperations, Built);
+ else
+ S = TLI.BuildSDIV(N, DAG, LegalOperations, LegalTypes, Built);
+
+ if (S) {
for (SDNode *N : Built)
AddToWorklist(N);
return S;
@@ -28681,7 +28649,13 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
return SDValue();
SmallVector<SDNode *, 8> Built;
- if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
+ SDValue S;
+ if (N->isVPOpcode())
+ S = TLI.BuildVPUDIV(N, DAG, LegalOperations, Built);
+ else
+ S = TLI.BuildUDIV(N, DAG, LegalOperations, LegalTypes, Built);
+
+ if (S) {
for (SDNode *N : Built)
AddToWorklist(N);
return S;
More information about the llvm-commits
mailing list