[llvm] d876214 - [RISCV] Begin to support more subvector inserts/extracts
Fraser Cormack via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 18 02:24:38 PST 2021
Author: Fraser Cormack
Date: 2021-02-18T10:18:27Z
New Revision: d876214990303e07310fb9f7a13b37715f051006
URL: https://github.com/llvm/llvm-project/commit/d876214990303e07310fb9f7a13b37715f051006
DIFF: https://github.com/llvm/llvm-project/commit/d876214990303e07310fb9f7a13b37715f051006.diff
LOG: [RISCV] Begin to support more subvector inserts/extracts
This patch adds support for INSERT_SUBVECTOR and EXTRACT_SUBVECTOR
(nominally where both operands are scalable vector types) where the
vector, subvector, and index align sufficiently to allow decomposition
to subregister manipulation:
* For extracts, the extracted subvector must correctly align with the
lower elements of a vector register.
* For inserts, the inserted subvector must be at least one full vector
register, and correctly align as above.
This approach should work for fixed-length vector insertion/extraction
too, but that will come later.
Reviewed By: craig.topper, khchen, arcbbb
Differential Revision: https://reviews.llvm.org/D96873
Added:
llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
Modified:
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 2c82704428ff..1c3d0cfc2fbb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -382,6 +382,48 @@ void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned IntNo,
ReplaceNode(Node, Store);
}
+static unsigned getRegClassIDForVecVT(MVT VT) {
+ if (VT.getVectorElementType() == MVT::i1)
+ return RISCV::VRRegClassID;
+ return getRegClassIDForLMUL(getLMUL(VT));
+}
+
+// Attempt to decompose a subvector insert/extract between VecVT and
+// SubVecVT via subregister indices. Returns the subregister index that
+// can perform the subvector insert/extract with the given element index, as
+// well as the index corresponding to any leftover subvectors that must be
+// further inserted/extracted within the register class for SubVecVT.
+static std::pair<unsigned, unsigned>
+decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT,
+ unsigned InsertExtractIdx,
+ const RISCVRegisterInfo *TRI) {
+ static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
+ RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
+ RISCV::VRM2RegClassID > RISCV::VRRegClassID),
+ "Register classes not ordered");
+ unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
+ unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
+ // Try to compose a subregister index that takes us from the incoming
+ // LMUL>1 register class down to the outgoing one. At each step we half
+ // the LMUL:
+ // nxv16i32 at 12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
+ // Note that this is not guaranteed to find a subregister index, such as
+ // when we are extracting from one VR type to another.
+ unsigned SubRegIdx = RISCV::NoSubRegister;
+ for (const unsigned RCID :
+ {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
+ if (VecRegClassID > RCID && SubRegClassID <= RCID) {
+ VecVT = VecVT.getHalfNumVectorElementsVT();
+ bool IsHi =
+ InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
+ SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
+ getSubregIndexByMVT(VecVT, IsHi));
+ if (IsHi)
+ InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
+ }
+ return {SubRegIdx, InsertExtractIdx};
+}
+
void RISCVDAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we have already selected.
if (Node->isMachineOpcode()) {
@@ -704,56 +746,127 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
break;
}
case ISD::INSERT_SUBVECTOR: {
- // Bail when not a "cast" like insert_subvector.
- if (Node->getConstantOperandVal(2) != 0)
- break;
- if (!Node->getOperand(0).isUndef())
- break;
+ SDValue V = Node->getOperand(0);
+ SDValue SubV = Node->getOperand(1);
+ SDLoc DL(SubV);
+ auto Idx = Node->getConstantOperandVal(2);
+ MVT SubVecVT = Node->getOperand(1).getSimpleValueType();
+
+ // TODO: This method of selecting INSERT_SUBVECTOR should work
+ // with any type of insertion (fixed <-> scalable) but we don't yet
+ // correctly identify the canonical register class for fixed-length types.
+ // For now, keep the two paths separate.
+ if (VT.isScalableVector() && SubVecVT.isScalableVector()) {
+ bool IsFullVecReg = false;
+ switch (getLMUL(SubVecVT)) {
+ default:
+ break;
+ case RISCVVLMUL::LMUL_1:
+ case RISCVVLMUL::LMUL_2:
+ case RISCVVLMUL::LMUL_4:
+ case RISCVVLMUL::LMUL_8:
+ IsFullVecReg = true;
+ break;
+ }
- // Bail when normal isel should do the job.
- MVT InVT = Node->getOperand(1).getSimpleValueType();
- if (VT.isFixedLengthVector() || InVT.isScalableVector())
- break;
+ // If the subvector doesn't occupy a full vector register then we can't
+ // insert it purely using subregister manipulation. We must not clobber
+ // the untouched elements (say, in the upper half of the VR register).
+ if (!IsFullVecReg)
+ break;
- unsigned RegClassID;
- if (VT.getVectorElementType() == MVT::i1)
- RegClassID = RISCV::VRRegClassID;
- else
- RegClassID = getRegClassIDForLMUL(getLMUL(VT));
+ const auto *TRI = Subtarget->getRegisterInfo();
+ unsigned SubRegIdx;
+ std::tie(SubRegIdx, Idx) =
+ decomposeSubvectorInsertExtractToSubRegs(VT, SubVecVT, Idx, TRI);
- SDValue V = Node->getOperand(1);
- SDLoc DL(V);
- SDValue RC =
- CurDAG->getTargetConstant(RegClassID, DL, Subtarget->getXLenVT());
- SDNode *NewNode =
- CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
- ReplaceNode(Node, NewNode);
- return;
- }
- case ISD::EXTRACT_SUBVECTOR: {
- // Bail when not a "cast" like extract_subvector.
- if (Node->getConstantOperandVal(1) != 0)
- break;
+ // If the Idx hasn't been completely eliminated then this is a subvector
+ // extract which doesn't naturally align to a vector register. These must
+ // be handled using instructions to manipulate the vector registers.
+ if (Idx != 0)
+ break;
- // Bail when normal isel can do the job.
- MVT InVT = Node->getOperand(0).getSimpleValueType();
- if (VT.isScalableVector() || InVT.isFixedLengthVector())
- break;
+ SDNode *NewNode = CurDAG->getMachineNode(
+ TargetOpcode::INSERT_SUBREG, DL, VT, V, SubV,
+ CurDAG->getTargetConstant(SubRegIdx, DL, Subtarget->getXLenVT()));
+ return ReplaceNode(Node, NewNode);
+ }
- unsigned RegClassID;
- if (InVT.getVectorElementType() == MVT::i1)
- RegClassID = RISCV::VRRegClassID;
- else
- RegClassID = getRegClassIDForLMUL(getLMUL(InVT));
+ if (VT.isScalableVector() && SubVecVT.isFixedLengthVector()) {
+ // Bail when not a "cast" like insert_subvector.
+ if (Idx != 0)
+ break;
+ if (!Node->getOperand(0).isUndef())
+ break;
+ unsigned RegClassID = getRegClassIDForVecVT(VT);
+
+ SDValue RC =
+ CurDAG->getTargetConstant(RegClassID, DL, Subtarget->getXLenVT());
+ SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ DL, VT, SubV, RC);
+ ReplaceNode(Node, NewNode);
+ return;
+ }
+ break;
+ }
+ case ISD::EXTRACT_SUBVECTOR: {
SDValue V = Node->getOperand(0);
+ auto Idx = Node->getConstantOperandVal(1);
+ MVT InVT = Node->getOperand(0).getSimpleValueType();
SDLoc DL(V);
- SDValue RC =
- CurDAG->getTargetConstant(RegClassID, DL, Subtarget->getXLenVT());
- SDNode *NewNode =
- CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
- ReplaceNode(Node, NewNode);
- return;
+
+ // TODO: This method of selecting EXTRACT_SUBVECTOR should work
+ // with any type of extraction (fixed <-> scalable) but we don't yet
+ // correctly identify the canonical register class for fixed-length types.
+ // For now, keep the two paths separate.
+ if (VT.isScalableVector() && InVT.isScalableVector()) {
+ const auto *TRI = Subtarget->getRegisterInfo();
+ unsigned SubRegIdx;
+ std::tie(SubRegIdx, Idx) =
+ decomposeSubvectorInsertExtractToSubRegs(InVT, VT, Idx, TRI);
+
+ // If the Idx hasn't been completely eliminated then this is a subvector
+ // extract which doesn't naturally align to a vector register. These must
+ // be handled using instructions to manipulate the vector registers.
+ if (Idx != 0)
+ break;
+
+ // If we haven't set a SubRegIdx, then we must be going between LMUL<=1
+ // types (VR -> VR). This can be done as a copy.
+ if (SubRegIdx == RISCV::NoSubRegister) {
+ unsigned RegClassID = getRegClassIDForVecVT(VT);
+ unsigned InRegClassID = getRegClassIDForVecVT(InVT);
+ assert(RegClassID == InRegClassID &&
+ RegClassID == RISCV::VRRegClassID &&
+ "Unexpected subvector extraction");
+ SDValue RC =
+ CurDAG->getTargetConstant(InRegClassID, DL, Subtarget->getXLenVT());
+ SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ DL, VT, V, RC);
+ return ReplaceNode(Node, NewNode);
+ }
+ SDNode *NewNode = CurDAG->getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG, DL, VT, V,
+ CurDAG->getTargetConstant(SubRegIdx, DL, Subtarget->getXLenVT()));
+ return ReplaceNode(Node, NewNode);
+ }
+
+ if (VT.isFixedLengthVector() && InVT.isScalableVector()) {
+ // Bail when not a "cast" like extract_subvector.
+ if (Idx != 0)
+ break;
+
+ unsigned InRegClassID = getRegClassIDForVecVT(InVT);
+
+ SDValue RC =
+ CurDAG->getTargetConstant(InRegClassID, DL, Subtarget->getXLenVT());
+ SDNode *NewNode =
+ CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
+ ReplaceNode(Node, NewNode);
+ return;
+ }
+ break;
}
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
new file mode 100644
index 000000000000..c14abab5440f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
@@ -0,0 +1,226 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 4 x i32> @extract_nxv8i32_nxv4i32_0(<vscale x 8 x i32> %vec) {
+; CHECK-LABEL: extract_nxv8i32_nxv4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 killed $v8m4
+; CHECK-NEXT: ret
+ %c = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, i64 0)
+ ret <vscale x 4 x i32> %c
+}
+
+define <vscale x 4 x i32> @extract_nxv8i32_nxv4i32_4(<vscale x 8 x i32> %vec) {
+; CHECK-LABEL: extract_nxv8i32_nxv4i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: ret
+ %c = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, i64 4)
+ ret <vscale x 4 x i32> %c
+}
+
+define <vscale x 2 x i32> @extract_nxv8i32_nxv2i32_0(<vscale x 8 x i32> %vec) {
+; CHECK-LABEL: extract_nxv8i32_nxv2i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m4
+; CHECK-NEXT: ret
+ %c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, i64 0)
+ ret <vscale x 2 x i32> %c
+}
+
+define <vscale x 2 x i32> @extract_nxv8i32_nxv2i32_2(<vscale x 8 x i32> %vec) {
+; CHECK-LABEL: extract_nxv8i32_nxv2i32_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, i64 2)
+ ret <vscale x 2 x i32> %c
+}
+
+define <vscale x 2 x i32> @extract_nxv8i32_nxv2i32_4(<vscale x 8 x i32> %vec) {
+; CHECK-LABEL: extract_nxv8i32_nxv2i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+ %c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, i64 4)
+ ret <vscale x 2 x i32> %c
+}
+
+define <vscale x 2 x i32> @extract_nxv8i32_nxv2i32_6(<vscale x 8 x i32> %vec) {
+; CHECK-LABEL: extract_nxv8i32_nxv2i32_6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v8, v11
+; CHECK-NEXT: ret
+ %c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, i64 6)
+ ret <vscale x 2 x i32> %c
+}
+
+define <vscale x 8 x i32> @extract_nxv16i32_nxv8i32_0(<vscale x 16 x i32> %vec) {
+; CHECK-LABEL: extract_nxv16i32_nxv8i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v8m8
+; CHECK-NEXT: ret
+ %c = call <vscale x 8 x i32> @llvm.experimental.vector.extract.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
+ ret <vscale x 8 x i32> %c
+}
+
+define <vscale x 8 x i32> @extract_nxv16i32_nxv8i32_8(<vscale x 16 x i32> %vec) {
+; CHECK-LABEL: extract_nxv16i32_nxv8i32_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: ret
+ %c = call <vscale x 8 x i32> @llvm.experimental.vector.extract.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, i64 8)
+ ret <vscale x 8 x i32> %c
+}
+
+define <vscale x 4 x i32> @extract_nxv16i32_nxv4i32_0(<vscale x 16 x i32> %vec) {
+; CHECK-LABEL: extract_nxv16i32_nxv4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 killed $v8m8
+; CHECK-NEXT: ret
+ %c = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
+ ret <vscale x 4 x i32> %c
+}
+
+define <vscale x 4 x i32> @extract_nxv16i32_nxv4i32_4(<vscale x 16 x i32> %vec) {
+; CHECK-LABEL: extract_nxv16i32_nxv4i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: ret
+ %c = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 4)
+ ret <vscale x 4 x i32> %c
+}
+
+define <vscale x 4 x i32> @extract_nxv16i32_nxv4i32_8(<vscale x 16 x i32> %vec) {
+; CHECK-LABEL: extract_nxv16i32_nxv4i32_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: ret
+ %c = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 8)
+ ret <vscale x 4 x i32> %c
+}
+
+define <vscale x 4 x i32> @extract_nxv16i32_nxv4i32_12(<vscale x 16 x i32> %vec) {
+; CHECK-LABEL: extract_nxv16i32_nxv4i32_12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv2r.v v8, v14
+; CHECK-NEXT: ret
+ %c = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 12)
+ ret <vscale x 4 x i32> %c
+}
+
+define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_0(<vscale x 16 x i32> %vec) {
+; CHECK-LABEL: extract_nxv16i32_nxv2i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m8
+; CHECK-NEXT: ret
+ %c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
+ ret <vscale x 2 x i32> %c
+}
+
+define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_2(<vscale x 16 x i32> %vec) {
+; CHECK-LABEL: extract_nxv16i32_nxv2i32_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 2)
+ ret <vscale x 2 x i32> %c
+}
+
+define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_4(<vscale x 16 x i32> %vec) {
+; CHECK-LABEL: extract_nxv16i32_nxv2i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+ %c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 4)
+ ret <vscale x 2 x i32> %c
+}
+
+define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_6(<vscale x 16 x i32> %vec) {
+; CHECK-LABEL: extract_nxv16i32_nxv2i32_6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v8, v11
+; CHECK-NEXT: ret
+ %c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 6)
+ ret <vscale x 2 x i32> %c
+}
+
+define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_8(<vscale x 16 x i32> %vec) {
+; CHECK-LABEL: extract_nxv16i32_nxv2i32_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v8, v12
+; CHECK-NEXT: ret
+ %c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 8)
+ ret <vscale x 2 x i32> %c
+}
+
+define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_10(<vscale x 16 x i32> %vec) {
+; CHECK-LABEL: extract_nxv16i32_nxv2i32_10:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v8, v13
+; CHECK-NEXT: ret
+ %c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 10)
+ ret <vscale x 2 x i32> %c
+}
+
+define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_12(<vscale x 16 x i32> %vec) {
+; CHECK-LABEL: extract_nxv16i32_nxv2i32_12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v8, v14
+; CHECK-NEXT: ret
+ %c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 12)
+ ret <vscale x 2 x i32> %c
+}
+
+define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_14(<vscale x 16 x i32> %vec) {
+; CHECK-LABEL: extract_nxv16i32_nxv2i32_14:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v8, v15
+; CHECK-NEXT: ret
+ %c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 14)
+ ret <vscale x 2 x i32> %c
+}
+
+define <vscale x 1 x i32> @extract_nxv16i32_nxv1i32_0(<vscale x 16 x i32> %vec) {
+; CHECK-LABEL: extract_nxv16i32_nxv1i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m8
+; CHECK-NEXT: ret
+ %c = call <vscale x 1 x i32> @llvm.experimental.vector.extract.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
+ ret <vscale x 1 x i32> %c
+}
+
+; TODO: Extracts that don't align to a vector register are not yet supported.
+; In this case we want to extract the upper half of the lowest VR subregister
+; in the LMUL group.
+; define <vscale x 1 x i32> @extract_nxv16i32_nxv1i32_1(<vscale x 16 x i32> %vec) {
+; %c = call <vscale x 1 x i32> @llvm.experimental.vector.extract.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, i64 1)
+; ret <vscale x 1 x i32> %c
+; }
+
+define <vscale x 1 x i32> @extract_nxv16i32_nxv1i32_2(<vscale x 16 x i32> %vec) {
+; CHECK-LABEL: extract_nxv16i32_nxv1i32_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %c = call <vscale x 1 x i32> @llvm.experimental.vector.extract.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, i64 2)
+ ret <vscale x 1 x i32> %c
+}
+
+define <vscale x 1 x i32> @extract_nxv2i32_nxv1i32_0(<vscale x 2 x i32> %vec) {
+; CHECK-LABEL: extract_nxv2i32_nxv1i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %c = call <vscale x 1 x i32> @llvm.experimental.vector.extract.nxv1i32.nxv2i32(<vscale x 2 x i32> %vec, i64 0)
+ ret <vscale x 1 x i32> %c
+}
+
+declare <vscale x 1 x i32> @llvm.experimental.vector.extract.nxv1i32.nxv2i32(<vscale x 2 x i32> %vec, i64 %idx)
+
+declare <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, i64 %idx)
+declare <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, i64 %idx)
+
+declare <vscale x 1 x i32> @llvm.experimental.vector.extract.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, i64 %idx)
+declare <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 %idx)
+declare <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 %idx)
+declare <vscale x 8 x i32> @llvm.experimental.vector.extract.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, i64 %idx)
diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
new file mode 100644
index 000000000000..6538ec5dd06c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
@@ -0,0 +1,206 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_0(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_4(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv4i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv2r.v v10, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_0(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_2(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v9, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_4(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_6(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v11, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_0(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv8i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 0)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_8(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv8i32_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv4r.v v12, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 8)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_0(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv2r.v v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_4(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv2r.v v10, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_8(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv2r.v v12, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 8)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_12(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv2r.v v14, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 12)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_0(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_2(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v9, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_4(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_6(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v11, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_8(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 8)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_10(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_10:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v13, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 10)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_12(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v14, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 12)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_14(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_14:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v15, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 14)
+ ret <vscale x 16 x i32> %v
+}
+
+; TODO: Inserts that are less than LMUL=1 are not yet supported. In this case
+; we need mask out the unaffected elements (top half of the VR %subvec
+; register)
+;define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_0(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 0)
+; ret <vscale x 16 x i32> %v
+;}
+
+; TODO: Inserts that don't align to a vector register are not yet supported.
+; In this case we want to insert the subvector into the upper half of the
+; lowest VR subregister in the LMUL group.
+;define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_1(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 1)
+; ret <vscale x 16 x i32> %v
+;}
+
+declare <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32>, <vscale x 2 x i32>, i64 %idx)
+declare <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32>, <vscale x 4 x i32>, i64 %idx)
+
+declare <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32>, <vscale x 1 x i32>, i64 %idx)
+declare <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32>, <vscale x 2 x i32>, i64 %idx)
+declare <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32>, <vscale x 4 x i32>, i64 %idx)
+declare <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32>, <vscale x 8 x i32>, i64 %idx)
More information about the llvm-commits
mailing list