[llvm] efedcbe - [RISCV] Fold ops into vmv.v.v as vmerge with all-ones mask
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 19 09:24:51 PDT 2023
Author: Luke Lau
Date: 2023-07-19T17:24:42+01:00
New Revision: efedcbeeb88c3cfd6fffb861067e36c43476a002
URL: https://github.com/llvm/llvm-project/commit/efedcbeeb88c3cfd6fffb861067e36c43476a002
DIFF: https://github.com/llvm/llvm-project/commit/efedcbeeb88c3cfd6fffb861067e36c43476a002.diff
LOG: [RISCV] Fold ops into vmv.v.v as vmerge with all-ones mask
A vmv.v.v shares the same encoding as a vmerge that isn't masked, so we can
also fold it into its operands if we treat it as a vmerge with an all-ones
mask. We take care here not to actually transform the existing vmv into a
vmerge, otherwise things like True.hasOneUse() become inaccurate. Instead this
just returns an equivalent list of operands.
This is an alternative to D153351.
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D155101
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll
llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector-shuffle.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 054ed96c050742..d36492ac67809a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3123,15 +3123,14 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
return false;
}
-// Return true if we can make sure mask of N is all-ones mask.
-static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
+static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
// Check that we're using V0 as a mask register.
- if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) ||
- cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0)
+ if (!isa<RegisterSDNode>(MaskOp) ||
+ cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
return false;
// The glued user defines V0.
- const auto *Glued = N->getGluedNode();
+ const auto *Glued = GlueOp.getNode();
if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
return false;
@@ -3158,6 +3157,12 @@ static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
IsVMSet(MaskSetter.getMachineOpcode());
}
+// Return true if we can make sure mask of N is all-ones mask.
+static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
+ return usesAllOnesMask(N->getOperand(MaskOpIdx),
+ N->getOperand(N->getNumOperands() - 1));
+}
+
static bool isImplicitDef(SDValue V) {
return V.isMachineOpcode() &&
V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
@@ -3213,6 +3218,45 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
return true;
}
+static bool IsVMerge(SDNode *N) {
+ unsigned Opc = N->getMachineOpcode();
+ return Opc == RISCV::PseudoVMERGE_VVM_MF8 ||
+ Opc == RISCV::PseudoVMERGE_VVM_MF4 ||
+ Opc == RISCV::PseudoVMERGE_VVM_MF2 ||
+ Opc == RISCV::PseudoVMERGE_VVM_M1 ||
+ Opc == RISCV::PseudoVMERGE_VVM_M2 ||
+ Opc == RISCV::PseudoVMERGE_VVM_M4 || Opc == RISCV::PseudoVMERGE_VVM_M8;
+}
+
+static bool IsVMv(SDNode *N) {
+ unsigned Opc = N->getMachineOpcode();
+ return Opc == RISCV::PseudoVMV_V_V_MF8 || Opc == RISCV::PseudoVMV_V_V_MF4 ||
+ Opc == RISCV::PseudoVMV_V_V_MF2 || Opc == RISCV::PseudoVMV_V_V_M1 ||
+ Opc == RISCV::PseudoVMV_V_V_M2 || Opc == RISCV::PseudoVMV_V_V_M4 ||
+ Opc == RISCV::PseudoVMV_V_V_M8;
+}
+
+static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
+ switch (LMUL) {
+ case RISCVII::LMUL_F8:
+ return RISCV::PseudoVMSET_M_B1;
+ case RISCVII::LMUL_F4:
+ return RISCV::PseudoVMSET_M_B2;
+ case RISCVII::LMUL_F2:
+ return RISCV::PseudoVMSET_M_B4;
+ case RISCVII::LMUL_1:
+ return RISCV::PseudoVMSET_M_B8;
+ case RISCVII::LMUL_2:
+ return RISCV::PseudoVMSET_M_B16;
+ case RISCVII::LMUL_4:
+ return RISCV::PseudoVMSET_M_B32;
+ case RISCVII::LMUL_8:
+ return RISCV::PseudoVMSET_M_B64;
+ case RISCVII::LMUL_RESERVED:
+ llvm_unreachable("Unexpected LMUL");
+ }
+}
+
// Try to fold away VMERGE_VVM instructions. We handle these cases:
// -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction
// folds to a masked TU instruction. VMERGE_VVM must have have merge operand
@@ -3227,16 +3271,27 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
// form with an IMPLICIT_DEF passthrough operand or the unsuffixed (TA) pseudo
// form.
bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
-
- SDValue Merge = N->getOperand(0);
- SDValue False = N->getOperand(1);
- SDValue True = N->getOperand(2);
- SDValue Mask = N->getOperand(3);
- SDValue VL = N->getOperand(4);
- // We always have a glue node for the mask at v0
- assert(cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
- SDValue Glue = N->getOperand(N->getNumOperands() - 1);
- assert(Glue.getValueType() == MVT::Glue);
+ SDValue Merge, False, True, VL, Mask, Glue;
+ // A vmv.v.v is equivalent to a vmerge with an all-ones mask.
+ if (IsVMv(N)) {
+ Merge = N->getOperand(0);
+ False = N->getOperand(0);
+ True = N->getOperand(1);
+ VL = N->getOperand(2);
+ // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones
+ // mask later below.
+ } else {
+ assert(IsVMerge(N));
+ Merge = N->getOperand(0);
+ False = N->getOperand(1);
+ True = N->getOperand(2);
+ Mask = N->getOperand(3);
+ VL = N->getOperand(4);
+ // We always have a glue node for the mask at v0.
+ Glue = N->getOperand(N->getNumOperands() - 1);
+ }
+ assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
+ assert(!Glue || Glue.getValueType() == MVT::Glue);
// We require that either merge and false are the same, or that merge
// is undefined.
@@ -3291,7 +3346,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
// the mask from the True instruction.
// FIXME: Support mask agnostic True instruction which would have an
// undef merge operand.
- if (!usesAllOnesMask(N, /* MaskOpIdx */ 3))
+ if (Mask && !usesAllOnesMask(Mask, Glue))
return false;
}
@@ -3315,9 +3370,11 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
SmallVector<const SDNode *, 4> LoopWorklist;
SmallPtrSet<const SDNode *, 16> Visited;
LoopWorklist.push_back(False.getNode());
- LoopWorklist.push_back(Mask.getNode());
+ if (Mask)
+ LoopWorklist.push_back(Mask.getNode());
LoopWorklist.push_back(VL.getNode());
- LoopWorklist.push_back(Glue.getNode());
+ if (Glue)
+ LoopWorklist.push_back(Glue.getNode());
if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
return false;
}
@@ -3327,6 +3384,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
unsigned TrueVLIndex =
True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
SDValue TrueVL = True.getOperand(TrueVLIndex);
+ SDValue SEW = True.getOperand(TrueVLIndex + 1);
auto GetMinVL = [](SDValue LHS, SDValue RHS) {
if (LHS == RHS)
@@ -3356,6 +3414,8 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
!True->getFlags().hasNoFPExcept())
return false;
+ SDLoc DL(N);
+
// From the preconditions we checked above, we know the mask and thus glue
// for the result node will be taken from True.
if (IsMasked) {
@@ -3363,8 +3423,22 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
Glue = True->getOperand(True->getNumOperands() - 1);
assert(Glue.getValueType() == MVT::Glue);
}
+ // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create
+ // an all-ones mask to use.
+ else if (IsVMv(N)) {
+ unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags;
+ unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags));
+ ElementCount EC = N->getValueType(0).getVectorElementCount();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, EC);
+
+ SDValue AllOnesMask =
+ SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0);
+ SDValue MaskCopy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
+ RISCV::V0, AllOnesMask, SDValue());
+ Mask = CurDAG->getRegister(RISCV::V0, MaskVT);
+ Glue = MaskCopy.getValue(1);
+ }
- SDLoc DL(N);
unsigned MaskedOpc = Info->MaskedPseudo;
#ifndef NDEBUG
const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
@@ -3375,10 +3449,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
"Expected instructions with mask have a tied dest.");
#endif
- SDValue SEW = True.getOperand(TrueVLIndex + 1);
-
- uint64_t Policy = isImplicitDef(N->getOperand(0)) ?
- RISCVII::TAIL_AGNOSTIC : /*TUMU*/ 0;
+ uint64_t Policy = isImplicitDef(Merge) ? RISCVII::TAIL_AGNOSTIC : /*TUMU*/ 0;
SDValue PolicyOp =
CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
@@ -3468,20 +3539,9 @@ bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
if (N->use_empty() || !N->isMachineOpcode())
continue;
- auto IsVMerge = [](unsigned Opcode) {
- return Opcode == RISCV::PseudoVMERGE_VVM_MF8 ||
- Opcode == RISCV::PseudoVMERGE_VVM_MF4 ||
- Opcode == RISCV::PseudoVMERGE_VVM_MF2 ||
- Opcode == RISCV::PseudoVMERGE_VVM_M1 ||
- Opcode == RISCV::PseudoVMERGE_VVM_M2 ||
- Opcode == RISCV::PseudoVMERGE_VVM_M4 ||
- Opcode == RISCV::PseudoVMERGE_VVM_M8;
- };
-
- unsigned Opc = N->getMachineOpcode();
- if (IsVMerge(Opc))
+ if (IsVMerge(N) || IsVMv(N))
MadeChange |= performCombineVMergeAndVOps(N);
- if (IsVMerge(Opc) && N->getOperand(0) == N->getOperand(1))
+ if (IsVMerge(N) && N->getOperand(0) == N->getOperand(1))
MadeChange |= performVMergeToVMv(N);
}
return MadeChange;
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll b/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll
index e5cd47ee2ea8c7..682ad576867240 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll
@@ -80,10 +80,8 @@ declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32(<vscale x 4 x i32>, ptr, iXLe
define <vscale x 4 x i32> @foldable_load(<vscale x 4 x i32> %passthru, ptr %p) {
; CHECK-LABEL: foldable_load:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT: vle32.v v10, (a0)
; CHECK-NEXT: vsetivli zero, 2, e32, m2, tu, ma
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
%v = call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32(<vscale x 4 x i32> poison, ptr %p, iXLen 4)
%w = call <vscale x 4 x i32> @llvm.riscv.vmv.v.v.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, iXLen 2)
diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
index b584fdafdf1cc9..6f05b8018f42b9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
@@ -470,13 +470,12 @@ define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_6(<vscale x 12 x half> %in)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v11, v10, a0
; CHECK-NEXT: vslidedown.vx v8, v9, a0
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
-; CHECK-NEXT: vmv.v.v v9, v11
; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vx v8, v10, a0
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vslidedown.vx v9, v10, a0
; CHECK-NEXT: ret
%res = call <vscale x 6 x half> @llvm.vector.extract.nxv6f16.nxv12f16(<vscale x 12 x half> %in, i64 6)
ret <vscale x 6 x half> %res
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector-shuffle.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector-shuffle.ll
index dc7c907d677974..64e1bf3e6c0324 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector-shuffle.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector-shuffle.ll
@@ -5,10 +5,8 @@
define <4 x i32> @insert_subvector_load_v4i32_v4i32(<4 x i32> %v1, ptr %p) {
; CHECK-LABEL: insert_subvector_load_v4i32_v4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v9, (a0)
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
%v2 = load <4 x i32>, ptr %p
%v3 = shufflevector <4 x i32> %v2, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
@@ -19,10 +17,8 @@ declare <4 x i32> @llvm.vp.load.v4i32(ptr, <4 x i1>, i32)
define <4 x i32> @insert_subvector_vp_load_v4i32_v4i32(<4 x i32> %v1, ptr %p, <4 x i1> %mask) {
; CHECK-LABEL: insert_subvector_vp_load_v4i32_v4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v9, (a0), v0.t
-; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu
+; CHECK-NEXT: vle32.v v8, (a0), v0.t
; CHECK-NEXT: ret
%v2 = call <4 x i32> @llvm.vp.load.v4i32(ptr %p, <4 x i1> %mask, i32 4)
%v3 = shufflevector <4 x i32> %v2, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
@@ -48,11 +44,8 @@ define <4 x i32> @insert_subvector_load_unfoldable_passthru_v4i32_v4i32(<4 x i32
define <4 x i32> @insert_subvector_load_foldable_passthru_v4i32_v4i32(<4 x i32> %v1, ptr %p, <4 x i1> %mask) {
; CHECK-LABEL: insert_subvector_load_foldable_passthru_v4i32_v4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vmv1r.v v9, v8
-; CHECK-NEXT: vle32.v v9, (a0), v0.t
-; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu
+; CHECK-NEXT: vle32.v v8, (a0), v0.t
; CHECK-NEXT: ret
%v2 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %p, i32 4, <4 x i1> %mask, <4 x i32> %v1)
%v3 = shufflevector <4 x i32> %v2, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
@@ -64,9 +57,8 @@ define <4 x i32> @insert_subvector_add_v4i32_v4i32(<4 x i32> %v1, <4 x i32> %v2)
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vid.v v10
-; CHECK-NEXT: vadd.vv v9, v9, v10
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: vadd.vv v8, v9, v10
; CHECK-NEXT: ret
%v3 = add <4 x i32> %v2, <i32 0, i32 1, i32 2, i32 3>
%v4 = shufflevector <4 x i32> %v3, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
@@ -77,10 +69,8 @@ declare <4 x i32> @llvm.vp.add.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
define <4 x i32> @insert_subvector_vp_add_v4i32_v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i1> %mask) {
; CHECK-LABEL: insert_subvector_vp_add_v4i32_v4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vadd.vi v9, v9, 1, v0.t
-; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu
+; CHECK-NEXT: vadd.vi v8, v9, 1, v0.t
; CHECK-NEXT: ret
%v3 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %v2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i1> %mask, i32 4)
%v4 = shufflevector <4 x i32> %v3, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
@@ -149,10 +139,8 @@ define <4 x i32> @insert_subvector_vp_add_v4i32_v2i32(<4 x i32> %v1, <2 x i32> %
define <4 x i32> @insert_subvector_load_v4i32_v8i32(<4 x i32> %v1, ptr %p) {
; CHECK-LABEL: insert_subvector_load_v4i32_v8i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v9, (a0)
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
%v2 = load <8 x i32>, ptr %p
%v3 = shufflevector <8 x i32> %v2, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
@@ -180,9 +168,8 @@ define <4 x i32> @insert_subvector_add_v4i32_v8i32(<4 x i32> %v1, <8 x i32> %v2)
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vadd.vv v9, v10, v9
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: vadd.vv v8, v10, v9
; CHECK-NEXT: ret
%v3 = add <8 x i32> %v2, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%v4 = shufflevector <8 x i32> %v3, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
More information about the llvm-commits
mailing list