[llvm] [DAGCombiner][VP] Add DAGCombine for VP_MUL (PR #80105)
Jianjian Guan via llvm-commits
llvm-commits at lists.llvm.org
Thu May 30 01:11:57 PDT 2024
https://github.com/jacquesguan updated https://github.com/llvm/llvm-project/pull/80105
>From 8bf0ab91b663550c741d8d9cdfd463c662dc59aa Mon Sep 17 00:00:00 2001
From: Jianjian GUAN <jacquesguan at me.com>
Date: Fri, 9 Jun 2023 10:19:49 +0800
Subject: [PATCH] [DAGCombiner][VP] Add DAGCombine for VP_MUL.
Use visitMUL to combine VP_MUL, share most logic of MUL with VP_MUL.
Differential Revision: https://reviews.llvm.org/D121187
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 107 ++++---
.../RISCV/rvv/fixed-vectors-vmul-vp.ll | 282 ++++++++++++++++++
llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll | 257 ++++++++++++++++
3 files changed, 602 insertions(+), 44 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 42e861e61201c2..0d70ef269cb8b4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -438,7 +438,7 @@ namespace {
SDValue visitSUBE(SDNode *N);
SDValue visitUSUBO_CARRY(SDNode *N);
SDValue visitSSUBO_CARRY(SDNode *N);
- SDValue visitMUL(SDNode *N);
+ template <class MatchContextClass> SDValue visitMUL(SDNode *N);
SDValue visitMULFIX(SDNode *N);
SDValue useDivRem(SDNode *N);
SDValue visitSDIV(SDNode *N);
@@ -1855,7 +1855,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
case ISD::UMULFIXSAT: return visitMULFIX(N);
- case ISD::MUL: return visitMUL(N);
+ case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
case ISD::SREM:
@@ -4331,11 +4331,13 @@ SDValue DAGCombiner::visitMULFIX(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitMUL(SDNode *N) {
+template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
SDLoc DL(N);
+ bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
+ MatchContextClass Matcher(DAG, TLI, N);
// fold (mul x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
@@ -4348,7 +4350,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// canonicalize constant to RHS (vector doesn't have to splat)
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
+ return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
bool N1IsConst = false;
bool N1IsOpaqueConst = false;
@@ -4356,8 +4358,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
- return FoldedVOp;
+ // TODO: Change this to use SimplifyVBinOp when it supports VP op.
+ if (!UseVP)
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
assert((!N1IsConst ||
@@ -4379,12 +4383,13 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (N1IsConst && ConstValue1.isOne())
return N0;
- if (SDValue NewSel = foldBinOpIntoSelect(N))
- return NewSel;
+ if (!UseVP)
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
// fold (mul x, -1) -> 0-x
if (N1IsConst && ConstValue1.isAllOnes())
- return DAG.getNegative(N0, DL, VT);
+ return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
// fold (mul x, (1 << c)) -> x << c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
@@ -4392,7 +4397,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
EVT ShiftVT = getShiftAmountTy(N0.getValueType());
SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
- return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
+ return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc);
}
}
@@ -4403,24 +4408,26 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// FIXME: If the input is something that is easily negated (e.g. a
// single-use add), we should put the negate there.
- return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(0, DL, VT),
- DAG.getNode(ISD::SHL, DL, VT, N0,
- DAG.getConstant(Log2Val, DL, ShiftVT)));
+ return Matcher.getNode(
+ ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ Matcher.getNode(ISD::SHL, DL, VT, N0,
+ DAG.getConstant(Log2Val, DL, ShiftVT)));
}
// Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
// hi result is in use in case we hit this mid-legalization.
- for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
- if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
- SDVTList LoHiVT = DAG.getVTList(VT, VT);
- // TODO: Can we match commutable operands with getNodeIfExists?
- if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
- if (LoHi->hasAnyUseOfValue(1))
- return SDValue(LoHi, 0);
- if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
- if (LoHi->hasAnyUseOfValue(1))
- return SDValue(LoHi, 0);
+ if (!UseVP) {
+ for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
+ SDVTList LoHiVT = DAG.getVTList(VT, VT);
+ // TODO: Can we match commutable operands with getNodeIfExists?
+ if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
+ if (LoHi->hasAnyUseOfValue(1))
+ return SDValue(LoHi, 0);
+ if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
+ if (LoHi->hasAnyUseOfValue(1))
+ return SDValue(LoHi, 0);
+ }
}
}
@@ -4439,7 +4446,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// x * 0xf800 --> (x << 16) - (x << 11)
// x * -0x8800 --> -((x << 15) + (x << 11))
// x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
- if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
+ if (!UseVP && N1IsConst &&
+ TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
// TODO: We could handle more general decomposition of any constant by
// having the target set a limit on number of ops and making a
// callback to determine that sequence (similar to sqrt expansion).
@@ -4473,7 +4481,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
- if (N0.getOpcode() == ISD::SHL) {
+ if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) {
SDValue N01 = N0.getOperand(1);
if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
@@ -4485,34 +4493,33 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue Sh, Y;
// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
- if (N0.getOpcode() == ISD::SHL &&
- isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) {
+ if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
+ isConstantOrConstantVector(N0.getOperand(1))) {
Sh = N0; Y = N1;
- } else if (N1.getOpcode() == ISD::SHL &&
- isConstantOrConstantVector(N1.getOperand(1)) &&
- N1->hasOneUse()) {
+ } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
+ isConstantOrConstantVector(N1.getOperand(1))) {
Sh = N1; Y = N0;
}
if (Sh.getNode()) {
- SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
- return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
+ SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
+ return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
}
}
// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
- if (N0.getOpcode() == ISD::ADD &&
+ if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
isMulAddWithConstProfitable(N, N0, N1))
- return DAG.getNode(
+ return Matcher.getNode(
ISD::ADD, DL, VT,
- DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
- DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
+ Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
+ Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
// Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
ConstantSDNode *NC1 = isConstOrConstSplat(N1);
- if (N0.getOpcode() == ISD::VSCALE && NC1) {
+ if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
const APInt &C1 = NC1->getAPIntValue();
return DAG.getVScale(DL, VT, C0 * C1);
@@ -4520,7 +4527,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
APInt MulVal;
- if (N0.getOpcode() == ISD::STEP_VECTOR &&
+ if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
APInt NewStep = C0 * MulVal;
@@ -4558,13 +4565,17 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// reassociate mul
- if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
- return RMUL;
+ // TODO: Change reassociateOps to support vp ops.
+ if (!UseVP)
+ if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
+ return RMUL;
// Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
- if (SDValue SD =
- reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
- return SD;
+ // TODO: Change reassociateReduction to support vp ops.
+ if (!UseVP)
+ if (SDValue SD =
+ reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
+ return SD;
// Simplify the operands using demanded-bits information.
if (SimplifyDemandedBits(SDValue(N, 0)))
@@ -26693,6 +26704,10 @@ SDValue DAGCombiner::visitVPOp(SDNode *N) {
return visitFMA<VPMatchContext>(N);
case ISD::VP_SELECT:
return visitVP_SELECT(N);
+ case ISD::VP_MUL:
+ return visitMUL<VPMatchContext>(N);
+ default:
+ break;
}
return SDValue();
}
@@ -27850,6 +27865,10 @@ static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
if (!VT.isVector())
return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
// We need to create a build vector
+ if (Op.getOpcode() == ISD::SPLAT_VECTOR)
+ return DAG.getSplat(VT, DL,
+ DAG.getConstant(Pow2Constants.back().logBase2(), DL,
+ VT.getScalarType()));
SmallVector<SDValue> Log2Ops;
for (const APInt &Pow2 : Pow2Constants)
Log2Ops.emplace_back(
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
index 2525ac03ea9a87..8970fbf740d235 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
@@ -921,3 +921,285 @@ define <16 x i64> @vmul_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %
%v = call <16 x i64> @llvm.vp.mul.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl)
ret <16 x i64> %v
}
+
+
+define <8 x i64> @vmul_vv_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vmul_vv_undef_v8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vmul_vv_undef_v8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: ret
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> undef, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vmul_vx_undef_v8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vmul_vx_undef_v8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> undef, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_zero_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vmul_vx_zero_v8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vmul_vx_zero_v8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 0, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_zero_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vmul_vx_zero_v8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vmul_vx_zero_v8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 0, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_one_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_one_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_one_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_one_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_negone_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negone_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_negone_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negone_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vrsub.vi v8, v8, 0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_pow2_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_pow2_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 6, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 64, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_pow2_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_pow2_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 6
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 64, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_negpow2_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negpow2_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 6, v0.t
+; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -64, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_negpow2_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negpow2_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 6
+; CHECK-NEXT: vrsub.vi v8, v8, 0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -64, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+declare <8 x i64> @llvm.vp.shl.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
+
+define <8 x i64> @vmul_vshl_vx_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vshl_vx_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t
+; CHECK-NEXT: li a0, 7
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head1 = insertelement <8 x i64> poison, i64 3, i32 0
+ %vb = shufflevector <8 x i64> %elt.head1, <8 x i64> poison, <8 x i32> zeroinitializer
+ %elt.head2 = insertelement <8 x i64> poison, i64 7, i32 0
+ %vc = shufflevector <8 x i64> %elt.head2, <8 x i64> poison, <8 x i32> zeroinitializer
+ %vshl = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %vshl, <8 x i64> %vc, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vshl_vx_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vshl_vx_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 56
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %elt.head1 = insertelement <8 x i64> poison, i64 3, i32 0
+ %vb = shufflevector <8 x i64> %elt.head1, <8 x i64> poison, <8 x i32> zeroinitializer
+ %elt.head2 = insertelement <8 x i64> poison, i64 7, i32 0
+ %vc = shufflevector <8 x i64> %elt.head2, <8 x i64> poison, <8 x i32> zeroinitializer
+ %vshl = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %vshl, <8 x i64> %vc, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vshl_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vshl_vv_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 7, v0.t
+; CHECK-NEXT: vmul.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 7, i32 0
+ %vc = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %vshl = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %vc, <8 x i1> %m, i32 %evl)
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %vshl, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vshl_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vshl_vv_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vmul.vv v8, v8, v12
+; CHECK-NEXT: vsll.vi v8, v8, 7
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %elt.head = insertelement <8 x i64> poison, i64 7, i32 0
+ %vc = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %vshl = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %vc, <8 x i1> %m, i32 %evl)
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %vshl, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+declare <8 x i64> @llvm.vp.add.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
+
+define <8 x i64> @vmul_vadd_vx_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vadd_vx_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vadd.vi v8, v8, 3, v0.t
+; CHECK-NEXT: li a0, 7
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head1 = insertelement <8 x i64> poison, i64 3, i32 0
+ %vb = shufflevector <8 x i64> %elt.head1, <8 x i64> poison, <8 x i32> zeroinitializer
+ %elt.head2 = insertelement <8 x i64> poison, i64 7, i32 0
+ %vc = shufflevector <8 x i64> %elt.head2, <8 x i64> poison, <8 x i32> zeroinitializer
+ %vadd = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %vadd, <8 x i64> %vc, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vadd_vx_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vadd_vx_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 21
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: li a1, 7
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vmadd.vx v8, a1, v12
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %elt.head1 = insertelement <8 x i64> poison, i64 3, i32 0
+ %vb = shufflevector <8 x i64> %elt.head1, <8 x i64> poison, <8 x i32> zeroinitializer
+ %elt.head2 = insertelement <8 x i64> poison, i64 7, i32 0
+ %vc = shufflevector <8 x i64> %elt.head2, <8 x i64> poison, <8 x i32> zeroinitializer
+ %vadd = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %vadd, <8 x i64> %vc, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll
index 24d1768da1027f..51026cbcb8c4bf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll
@@ -1199,3 +1199,260 @@ define <vscale x 8 x i64> @vmul_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64
%v = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret <vscale x 8 x i64> %v
}
+
+define <vscale x 8 x i64> @vmul_vv_undef_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vv_undef_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> undef, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vmul_vx_undef_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_undef_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> undef, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vmul_vx_zero_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_zero_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 0, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vmul_vx_zero_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_zero_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 0, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vmul_vx_one_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_one_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 1, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vmul_vx_one_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_one_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 1, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vmul_vx_negone_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negone_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vmul_vx_negone_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negone_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vrsub.vi v8, v8, 0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vmul_vx_pow2_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_pow2_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 6, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 64, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vmul_vx_pow2_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_pow2_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 6
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 64, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vmul_vx_negpow2_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negpow2_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 6, v0.t
+; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 -64, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vmul_vx_negpow2_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negpow2_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 6
+; CHECK-NEXT: vrsub.vi v8, v8, 0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 -64, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+declare <vscale x 8 x i64> @llvm.vp.shl.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i64> @vmul_vshl_vx_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vshl_vx_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 56
+; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head1 = insertelement <vscale x 8 x i64> poison, i64 3, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %elt.head2 = insertelement <vscale x 8 x i64> poison, i64 7, i32 0
+ %vc = shufflevector <vscale x 8 x i64> %elt.head2, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %vshl = call <vscale x 8 x i64> @llvm.vp.shl.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %vshl, <vscale x 8 x i64> %vc, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vmul_vshl_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vshl_vx_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 56
+; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %elt.head1 = insertelement <vscale x 8 x i64> poison, i64 3, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %elt.head2 = insertelement <vscale x 8 x i64> poison, i64 7, i32 0
+ %vc = shufflevector <vscale x 8 x i64> %elt.head2, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %vshl = call <vscale x 8 x i64> @llvm.vp.shl.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %vshl, <vscale x 8 x i64> %vc, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vmul_vshl_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vshl_vv_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vmul.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 7, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 7, i32 0
+ %vc = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %vshl = call <vscale x 8 x i64> @llvm.vp.shl.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vc, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %vshl, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vmul_vshl_vv_nxv8i64_unmasked(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vshl_vv_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vsll.vi v8, v8, 7
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 7, i32 0
+ %vc = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %vshl = call <vscale x 8 x i64> @llvm.vp.shl.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vc, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %vshl, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+declare <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i64> @vmul_vadd_vx_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vadd_vx_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 7
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vmul.vx v8, v8, a1, v0.t
+; CHECK-NEXT: li a0, 21
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head1 = insertelement <vscale x 8 x i64> poison, i64 3, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %elt.head2 = insertelement <vscale x 8 x i64> poison, i64 7, i32 0
+ %vc = shufflevector <vscale x 8 x i64> %elt.head2, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %vadd = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %vadd, <vscale x 8 x i64> %vc, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vmul_vadd_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vadd_vx_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 21
+; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a1
+; CHECK-NEXT: li a1, 7
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vmadd.vx v8, a1, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %elt.head1 = insertelement <vscale x 8 x i64> poison, i64 3, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %elt.head2 = insertelement <vscale x 8 x i64> poison, i64 7, i32 0
+ %vc = shufflevector <vscale x 8 x i64> %elt.head2, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %vadd = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ %v = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %vadd, <vscale x 8 x i64> %vc, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
More information about the llvm-commits
mailing list