[llvm] [DAGCombiner][VP] Add DAGCombine for VP_MUL (PR #80105)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 30 22:26:32 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: Jianjian Guan (jacquesguan)
<details>
<summary>Changes</summary>
Use visitMUL to combine VP_MUL, share most logic of MUL with VP_MUL.
Migrate from https://reviews.llvm.org/D121187
---
Patch is 38.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/80105.diff
3 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+62-42)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll (+282)
- (modified) llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll (+257)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b17724cd07209..6268b6386fe4f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -439,7 +439,7 @@ namespace {
SDValue visitSUBE(SDNode *N);
SDValue visitUSUBO_CARRY(SDNode *N);
SDValue visitSSUBO_CARRY(SDNode *N);
- SDValue visitMUL(SDNode *N);
+ template <class MatchContextClass> SDValue visitMUL(SDNode *N);
SDValue visitMULFIX(SDNode *N);
SDValue useDivRem(SDNode *N);
SDValue visitSDIV(SDNode *N);
@@ -1948,7 +1948,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
case ISD::UMULFIXSAT: return visitMULFIX(N);
- case ISD::MUL: return visitMUL(N);
+ case ISD::MUL:
+ return visitMUL<EmptyMatchContext>(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
case ISD::SREM:
@@ -4356,11 +4357,13 @@ SDValue DAGCombiner::visitMULFIX(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitMUL(SDNode *N) {
+template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
SDLoc DL(N);
+ bool IsVP = ISD::isVPOpcode(N->getOpcode());
+ MatchContextClass matcher(DAG, TLI, N);
// fold (mul x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
@@ -4373,7 +4376,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// canonicalize constant to RHS (vector doesn't have to splat)
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
+ return matcher.getNode(ISD::MUL, DL, VT, N1, N0);
bool N1IsConst = false;
bool N1IsOpaqueConst = false;
@@ -4381,8 +4384,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
- return FoldedVOp;
+ // TODO: Change this to use SimplifyVBinOp when it supports VP op.
+ if (!IsVP)
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
assert((!N1IsConst ||
@@ -4404,12 +4409,13 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (N1IsConst && ConstValue1.isOne())
return N0;
- if (SDValue NewSel = foldBinOpIntoSelect(N))
- return NewSel;
+ if (!IsVP)
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
// fold (mul x, -1) -> 0-x
if (N1IsConst && ConstValue1.isAllOnes())
- return DAG.getNegative(N0, DL, VT);
+ return matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
// fold (mul x, (1 << c)) -> x << c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
@@ -4417,7 +4423,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
EVT ShiftVT = getShiftAmountTy(N0.getValueType());
SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
- return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
+ return matcher.getNode(ISD::SHL, DL, VT, N0, Trunc);
}
}
@@ -4428,26 +4434,27 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// FIXME: If the input is something that is easily negated (e.g. a
// single-use add), we should put the negate there.
- return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(0, DL, VT),
- DAG.getNode(ISD::SHL, DL, VT, N0,
- DAG.getConstant(Log2Val, DL, ShiftVT)));
+ return matcher.getNode(
+ ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ matcher.getNode(ISD::SHL, DL, VT, N0,
+ DAG.getConstant(Log2Val, DL, ShiftVT)));
}
// Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
// hi result is in use in case we hit this mid-legalization.
- for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
- if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
- SDVTList LoHiVT = DAG.getVTList(VT, VT);
- // TODO: Can we match commutable operands with getNodeIfExists?
- if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
- if (LoHi->hasAnyUseOfValue(1))
- return SDValue(LoHi, 0);
- if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
- if (LoHi->hasAnyUseOfValue(1))
- return SDValue(LoHi, 0);
+ if (!IsVP)
+ for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
+ SDVTList LoHiVT = DAG.getVTList(VT, VT);
+ // TODO: Can we match commutable operands with getNodeIfExists?
+ if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
+ if (LoHi->hasAnyUseOfValue(1))
+ return SDValue(LoHi, 0);
+ if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
+ if (LoHi->hasAnyUseOfValue(1))
+ return SDValue(LoHi, 0);
+ }
}
- }
// Try to transform:
// (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
@@ -4464,7 +4471,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// x * 0xf800 --> (x << 16) - (x << 11)
// x * -0x8800 --> -((x << 15) + (x << 11))
// x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
- if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
+ if (!IsVP && N1IsConst &&
+ TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
// TODO: We could handle more general decomposition of any constant by
// having the target set a limit on number of ops and making a
// callback to determine that sequence (similar to sqrt expansion).
@@ -4498,7 +4506,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
- if (N0.getOpcode() == ISD::SHL) {
+ if (matcher.match(N0, ISD::SHL)) {
SDValue N01 = N0.getOperand(1);
if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
@@ -4510,34 +4518,34 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue Sh, Y;
// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
- if (N0.getOpcode() == ISD::SHL &&
+ if (matcher.match(N0, ISD::SHL) &&
isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) {
Sh = N0; Y = N1;
- } else if (N1.getOpcode() == ISD::SHL &&
+ } else if (matcher.match(N1, ISD::SHL) &&
isConstantOrConstantVector(N1.getOperand(1)) &&
N1->hasOneUse()) {
Sh = N1; Y = N0;
}
if (Sh.getNode()) {
- SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
- return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
+ SDValue Mul = matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
+ return matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
}
}
// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
- if (N0.getOpcode() == ISD::ADD &&
+ if (matcher.match(N0, ISD::ADD) &&
DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
isMulAddWithConstProfitable(N, N0, N1))
- return DAG.getNode(
+ return matcher.getNode(
ISD::ADD, DL, VT,
- DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
- DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
+ matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
+ matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
// Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
ConstantSDNode *NC1 = isConstOrConstSplat(N1);
- if (N0.getOpcode() == ISD::VSCALE && NC1) {
+ if (!IsVP && N0.getOpcode() == ISD::VSCALE && NC1) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
const APInt &C1 = NC1->getAPIntValue();
return DAG.getVScale(DL, VT, C0 * C1);
@@ -4545,7 +4553,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
APInt MulVal;
- if (N0.getOpcode() == ISD::STEP_VECTOR &&
+ if (!IsVP && N0.getOpcode() == ISD::STEP_VECTOR &&
ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
APInt NewStep = C0 * MulVal;
@@ -4583,13 +4591,17 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// reassociate mul
- if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
- return RMUL;
+ // TODO: Change reassociateOps to support vp ops.
+ if (!IsVP)
+ if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
+ return RMUL;
// Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
- if (SDValue SD =
- reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
- return SD;
+ // TODO: Change reassociateReduction to support vp ops.
+ if (!IsVP)
+ if (SDValue SD =
+ reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
+ return SD;
// Simplify the operands using demanded-bits information.
if (SimplifyDemandedBits(SDValue(N, 0)))
@@ -26421,6 +26433,10 @@ SDValue DAGCombiner::visitVPOp(SDNode *N) {
return visitFMA<VPMatchContext>(N);
case ISD::VP_SELECT:
return visitVP_SELECT(N);
+ case ISD::VP_MUL:
+ return visitMUL<VPMatchContext>(N);
+ default:
+ break;
}
return SDValue();
}
@@ -27578,6 +27594,10 @@ static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
if (!VT.isVector())
return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
// We need to create a build vector
+ if (Op.getOpcode() == ISD::SPLAT_VECTOR)
+ return DAG.getSplat(VT, DL,
+ DAG.getConstant(Pow2Constants.back().logBase2(), DL,
+ VT.getScalarType()));
SmallVector<SDValue> Log2Ops;
for (const APInt &Pow2 : Pow2Constants)
Log2Ops.emplace_back(
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
index fbb97d6bf3229..24555af611cd0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
@@ -989,3 +989,285 @@ define <16 x i64> @vmul_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %
%v = call <16 x i64> @llvm.vp.mul.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
ret <16 x i64> %v
}
+
+
+define <8 x i64> @vmul_vv_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vmul_vv_undef_v8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vmul_vv_undef_v8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: ret
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> undef, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vmul_vx_undef_v8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vmul_vx_undef_v8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> undef, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_zero_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vmul_vx_zero_v8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vmul_vx_zero_v8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 0, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_zero_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vmul_vx_zero_v8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vmul_vx_zero_v8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 0, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_one_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_one_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_one_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_one_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_negone_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negone_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_negone_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negone_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vrsub.vi v8, v8, 0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_pow2_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_pow2_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 6, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 64, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_pow2_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_pow2_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 6
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 64, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_negpow2_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negpow2_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 6, v0.t
+; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -64, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vx_negpow2_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_negpow2_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 6
+; CHECK-NEXT: vrsub.vi v8, v8, 0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -64, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+declare <8 x i64> @llvm.vp.shl.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
+
+define <8 x i64> @vmul_vshl_vx_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vshl_vx_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t
+; CHECK-NEXT: li a0, 7
+; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head1 = insertelement <8 x i64> poison, i64 3, i32 0
+ %vb = shufflevector <8 x i64> %elt.head1, <8 x i64> poison, <8 x i32> zeroinitializer
+ %elt.head2 = insertelement <8 x i64> poison, i64 7, i32 0
+ %vc = shufflevector <8 x i64> %elt.head2, <8 x i64> poison, <8 x i32> zeroinitializer
+ %vshl = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %vshl, <8 x i64> %vc, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vshl_vx_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vshl_vx_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 56
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %elt.head1 = insertelement <8 x i64> poison, i64 3, i32 0
+ %vb = shufflevector <8 x i64> %elt.head1, <8 x i64> poison, <8 x i32> zeroinitializer
+ %elt.head2 = insertelement <8 x i64> poison, i64 7, i32 0
+ %vc = shufflevector <8 x i64> %elt.head2, <8 x i64> poison, <8 x i32> zeroinitializer
+ %vshl = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %vshl, <8 x i64> %vc, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vmul_vshl_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vshl_vv_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0,...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/80105
More information about the llvm-commits
mailing list