[llvm] bd4d421 - [RISCV] Support EXTRACT_SUBVECTOR on vector masks
Fraser Cormack via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 1 03:26:40 PST 2021
Author: Fraser Cormack
Date: 2021-03-01T11:20:09Z
New Revision: bd4d4216881d2ddba170808a8cd2f7b75cbc8de9
URL: https://github.com/llvm/llvm-project/commit/bd4d4216881d2ddba170808a8cd2f7b75cbc8de9
DIFF: https://github.com/llvm/llvm-project/commit/bd4d4216881d2ddba170808a8cd2f7b75cbc8de9.diff
LOG: [RISCV] Support EXTRACT_SUBVECTOR on vector masks
This patch adds support for extracting subvectors from vector masks.
This can be either extracting a scalable vector from another, or a fixed-length
vector from a fixed-length or scalable vector.
Since RVV lacks a way to slide vector masks down on an element-wise
basis and we don't know the true length of the vector registers, in many
cases we must resort to using equivalently-sized i8 vectors to perform
the operation. When this is not possible we fall back and extend to a
suitable i8 vector.
Support was also added for fixed-length truncation to mask types.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D97475
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ac388f5c7b0b..197d8189b1ef 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -419,6 +419,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Mask VTs are custom-expanded into a series of standard nodes
setOperationAction(ISD::TRUNCATE, VT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
}
for (MVT VT : IntVecVTs) {
@@ -537,12 +538,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
+
+ setOperationAction(ISD::TRUNCATE, VT, Custom);
+
// Operations below are
diff erent for between masks and other vectors.
if (VT.getVectorElementType() == MVT::i1) {
setOperationAction(ISD::AND, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::XOR, VT, Custom);
- setOperationAction(ISD::SETCC, VT, Custom);
continue;
}
@@ -578,7 +582,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VSELECT, VT, Custom);
- setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
@@ -2119,28 +2122,35 @@ SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
"Unexpected type for vector mask lowering");
SDValue Src = Op.getOperand(0);
- EVT VecVT = Src.getValueType();
-
- // Be careful not to introduce illegal scalar types at this stage, and be
- // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
- // illegal and must be expanded. Since we know that the constants are
- // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
- bool IsRV32E64 =
- !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
+ MVT VecVT = Src.getSimpleValueType();
+
+ // If this is a fixed vector, we need to convert it to a scalable vector.
+ MVT ContainerVT = VecVT;
+ if (VecVT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
+ Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
+ }
+
SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
- if (!IsRV32E64) {
- SplatOne = DAG.getSplatVector(VecVT, DL, SplatOne);
- SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
- } else {
- SplatOne = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatOne);
- SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
+ SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
+ SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
+
+ if (VecVT.isScalableVector()) {
+ SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
+ return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
}
- SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
+ SDValue Mask, VL;
+ std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
- return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
+ MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
+ SDValue Trunc =
+ DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
+ Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
+ DAG.getCondCode(ISD::SETNE), Mask, VL);
+ return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
}
SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
@@ -2511,6 +2521,43 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
unsigned OrigIdx = Op.getConstantOperandVal(1);
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ // We don't have the ability to slide mask vectors down indexed by their i1
+ // elements; the smallest we can do is i8. Often we are able to bitcast to
+ // equivalent i8 vectors. Note that when extracting a fixed-length vector
+ // from a scalable one, we might not necessarily have enough scalable
+ // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
+ if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
+ if (VecVT.getVectorMinNumElements() >= 8 &&
+ SubVecVT.getVectorMinNumElements() >= 8) {
+ assert(OrigIdx % 8 == 0 && "Invalid index");
+ assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
+ SubVecVT.getVectorMinNumElements() % 8 == 0 &&
+ "Unexpected mask vector lowering");
+ OrigIdx /= 8;
+ SubVecVT =
+ MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
+ SubVecVT.isScalableVector());
+ VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
+ VecVT.isScalableVector());
+ Vec = DAG.getBitcast(VecVT, Vec);
+ } else {
+ // We can't slide this mask vector down, indexed by its i1 elements.
+ // This poses a problem when we wish to extract a scalable vector which
+ // can't be re-expressed as a larger type. Just choose the slow path and
+ // extend to a larger type, then truncate back down.
+ // TODO: We could probably improve this when extracting certain fixed
+ // from fixed, where we can extract as i8 and shift the correct element
+ // right to reach the desired subvector?
+ MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
+ MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
+ Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
+ Op.getOperand(1));
+ SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
+ return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
+ }
+ }
+
// If the subvector vector is a fixed-length type, we cannot use subregister
// manipulation to simplify the codegen; we don't know which register of a
// LMUL group contains the specific subvector as we only know the minimum
@@ -2577,8 +2624,12 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
// Now the vector is in the right position, extract our final subvector. This
// should resolve to a COPY.
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
- DAG.getConstant(0, DL, XLenVT));
+ Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
+ DAG.getConstant(0, DL, XLenVT));
+
+ // We might have bitcast from a mask type: cast back to the original type if
+ // required.
+ return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
}
SDValue
diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
index f297afce9d82..3f264792e1ac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
@@ -358,6 +358,51 @@ define <vscale x 2 x half> @extract_nxv2f16_nxv16f16_4(<vscale x 16 x half> %vec
ret <vscale x 2 x half> %c
}
+define <vscale x 8 x i1> @extract_nxv64i1_nxv8i1_0(<vscale x 64 x i1> %mask) {
+; CHECK-LABEL: extract_nxv64i1_nxv8i1_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %c = call <vscale x 8 x i1> @llvm.experimental.vector.extract.nxv8i1(<vscale x 64 x i1> %mask, i64 0)
+ ret <vscale x 8 x i1> %c
+}
+
+define <vscale x 8 x i1> @extract_nxv64i1_nxv8i1_8(<vscale x 64 x i1> %mask) {
+; CHECK-LABEL: extract_nxv64i1_nxv8i1_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu
+; CHECK-NEXT: vslidedown.vx v0, v0, a0
+; CHECK-NEXT: ret
+ %c = call <vscale x 8 x i1> @llvm.experimental.vector.extract.nxv8i1(<vscale x 64 x i1> %mask, i64 8)
+ ret <vscale x 8 x i1> %c
+}
+
+define <vscale x 2 x i1> @extract_nxv64i1_nxv2i1_0(<vscale x 64 x i1> %mask) {
+; CHECK-LABEL: extract_nxv64i1_nxv2i1_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %c = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1(<vscale x 64 x i1> %mask, i64 0)
+ ret <vscale x 2 x i1> %c
+}
+
+define <vscale x 2 x i1> @extract_nxv64i1_nxv2i1_2(<vscale x 64 x i1> %mask) {
+; CHECK-LABEL: extract_nxv64i1_nxv2i1_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu
+; CHECK-NEXT: vslidedown.vx v25, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vmsne.vi v0, v25, 0
+; CHECK-NEXT: ret
+ %c = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1(<vscale x 64 x i1> %mask, i64 2)
+ ret <vscale x 2 x i1> %c
+}
+
declare <vscale x 1 x i8> @llvm.experimental.vector.extract.nxv1i8.nxv8i8(<vscale x 8 x i8> %vec, i64 %idx)
declare <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 %idx)
@@ -373,3 +418,6 @@ declare <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<v
declare <vscale x 8 x i32> @llvm.experimental.vector.extract.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, i64 %idx)
declare <vscale x 2 x half> @llvm.experimental.vector.extract.nxv2f16.nxv16f16(<vscale x 16 x half> %vec, i64 %idx)
+
+declare <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1(<vscale x 64 x i1> %vec, i64 %idx)
+declare <vscale x 8 x i1> @llvm.experimental.vector.extract.nxv8i1(<vscale x 64 x i1> %vec, i64 %idx)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
index dadcc130b238..8618598938f7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
@@ -152,6 +152,321 @@ define void @extract_v8i32_nxv16i32_8(<vscale x 16 x i32> %x, <8 x i32>* %y) {
ret void
}
+define void @extract_v8i1_v64i1_0(<64 x i1>* %x, <8 x i1>* %y) {
+; LMULMAX2-LABEL: extract_v8i1_v64i1_0:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 32
+; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vle1.v v25, (a0)
+; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX2-NEXT: vse1.v v25, (a1)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-LABEL: extract_v8i1_v64i1_0:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle1.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vse1.v v25, (a1)
+; LMULMAX1-NEXT: ret
+ %a = load <64 x i1>, <64 x i1>* %x
+ %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 0)
+ store <8 x i1> %c, <8 x i1>* %y
+ ret void
+}
+
+define void @extract_v8i1_v64i1_8(<64 x i1>* %x, <8 x i1>* %y) {
+; LMULMAX2-LABEL: extract_v8i1_v64i1_8:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 32
+; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vle1.v v25, (a0)
+; LMULMAX2-NEXT: vsetivli a0, 1, e8,m1,ta,mu
+; LMULMAX2-NEXT: vslidedown.vi v25, v25, 1
+; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX2-NEXT: vse1.v v25, (a1)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-LABEL: extract_v8i1_v64i1_8:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle1.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 1, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 1
+; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vse1.v v25, (a1)
+; LMULMAX1-NEXT: ret
+ %a = load <64 x i1>, <64 x i1>* %x
+ %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 8)
+ store <8 x i1> %c, <8 x i1>* %y
+ ret void
+}
+
+define void @extract_v8i1_v64i1_48(<64 x i1>* %x, <8 x i1>* %y) {
+; LMULMAX2-LABEL: extract_v8i1_v64i1_48:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a0, a0, 4
+; LMULMAX2-NEXT: addi a2, zero, 32
+; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vle1.v v25, (a0)
+; LMULMAX2-NEXT: vsetivli a0, 1, e8,m1,ta,mu
+; LMULMAX2-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX2-NEXT: vse1.v v25, (a1)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-LABEL: extract_v8i1_v64i1_48:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: addi a0, a0, 6
+; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle1.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vse1.v v25, (a1)
+; LMULMAX1-NEXT: ret
+ %a = load <64 x i1>, <64 x i1>* %x
+ %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 48)
+ store <8 x i1> %c, <8 x i1>* %y
+ ret void
+}
+
+define void @extract_v8i1_nxv2i1_0(<vscale x 2 x i1> %x, <8 x i1>* %y) {
+; CHECK-LABEL: extract_v8i1_nxv2i1_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vse1.v v0, (a0)
+; CHECK-NEXT: ret
+ %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv2i1(<vscale x 2 x i1> %x, i64 0)
+ store <8 x i1> %c, <8 x i1>* %y
+ ret void
+}
+
+define void @extract_v8i1_nxv2i1_2(<vscale x 2 x i1> %x, <8 x i1>* %y) {
+; CHECK-LABEL: extract_v8i1_nxv2i1_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a1, 8, e8,mf4,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v25, 2
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vse1.v v26, (a0)
+; CHECK-NEXT: ret
+ %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv2i1(<vscale x 2 x i1> %x, i64 2)
+ store <8 x i1> %c, <8 x i1>* %y
+ ret void
+}
+
+define void @extract_v8i1_nxv64i1_0(<vscale x 64 x i1> %x, <8 x i1>* %y) {
+; CHECK-LABEL: extract_v8i1_nxv64i1_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vse1.v v0, (a0)
+; CHECK-NEXT: ret
+ %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %x, i64 0)
+ store <8 x i1> %c, <8 x i1>* %y
+ ret void
+}
+
+define void @extract_v8i1_nxv64i1_8(<vscale x 64 x i1> %x, <8 x i1>* %y) {
+; CHECK-LABEL: extract_v8i1_nxv64i1_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v0, 1
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vse1.v v25, (a0)
+; CHECK-NEXT: ret
+ %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %x, i64 8)
+ store <8 x i1> %c, <8 x i1>* %y
+ ret void
+}
+
+define void @extract_v8i1_nxv64i1_48(<vscale x 64 x i1> %x, <8 x i1>* %y) {
+; CHECK-LABEL: extract_v8i1_nxv64i1_48:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v0, 6
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vse1.v v25, (a0)
+; CHECK-NEXT: ret
+ %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %x, i64 48)
+ store <8 x i1> %c, <8 x i1>* %y
+ ret void
+}
+
+
+define void @extract_v2i1_v64i1_0(<64 x i1>* %x, <2 x i1>* %y) {
+; LMULMAX2-LABEL: extract_v2i1_v64i1_0:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 32
+; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vle1.v v25, (a0)
+; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX2-NEXT: vse1.v v25, (a1)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-LABEL: extract_v2i1_v64i1_0:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle1.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vse1.v v25, (a1)
+; LMULMAX1-NEXT: ret
+ %a = load <64 x i1>, <64 x i1>* %x
+ %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 0)
+ store <2 x i1> %c, <2 x i1>* %y
+ ret void
+}
+
+define void @extract_v2i1_v64i1_2(<64 x i1>* %x, <2 x i1>* %y) {
+; LMULMAX2-LABEL: extract_v2i1_v64i1_2:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 32
+; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vle1.v v0, (a0)
+; LMULMAX2-NEXT: vmv.v.i v26, 0
+; LMULMAX2-NEXT: vmerge.vim v26, v26, 1, v0
+; LMULMAX2-NEXT: vsetivli a0, 2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vslidedown.vi v26, v26, 2
+; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX2-NEXT: vmsne.vi v25, v26, 0
+; LMULMAX2-NEXT: vse1.v v25, (a1)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-LABEL: extract_v2i1_v64i1_2:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle1.v v0, (a0)
+; LMULMAX1-NEXT: vmv.v.i v25, 0
+; LMULMAX1-NEXT: vmerge.vim v25, v25, 1, v0
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vmsne.vi v26, v25, 0
+; LMULMAX1-NEXT: vse1.v v26, (a1)
+; LMULMAX1-NEXT: ret
+ %a = load <64 x i1>, <64 x i1>* %x
+ %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 2)
+ store <2 x i1> %c, <2 x i1>* %y
+ ret void
+}
+
+define void @extract_v2i1_v64i1_42(<64 x i1>* %x, <2 x i1>* %y) {
+; LMULMAX2-LABEL: extract_v2i1_v64i1_42:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a0, a0, 4
+; LMULMAX2-NEXT: addi a2, zero, 32
+; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vle1.v v0, (a0)
+; LMULMAX2-NEXT: vmv.v.i v26, 0
+; LMULMAX2-NEXT: vmerge.vim v26, v26, 1, v0
+; LMULMAX2-NEXT: vsetivli a0, 2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vslidedown.vi v26, v26, 10
+; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX2-NEXT: vmsne.vi v25, v26, 0
+; LMULMAX2-NEXT: vse1.v v25, (a1)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-LABEL: extract_v2i1_v64i1_42:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: addi a0, a0, 4
+; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle1.v v0, (a0)
+; LMULMAX1-NEXT: vmv.v.i v25, 0
+; LMULMAX1-NEXT: vmerge.vim v25, v25, 1, v0
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 10
+; LMULMAX1-NEXT: vmsne.vi v26, v25, 0
+; LMULMAX1-NEXT: vse1.v v26, (a1)
+; LMULMAX1-NEXT: ret
+ %a = load <64 x i1>, <64 x i1>* %x
+ %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 42)
+ store <2 x i1> %c, <2 x i1>* %y
+ ret void
+}
+
+define void @extract_v2i1_nxv2i1_0(<vscale x 2 x i1> %x, <2 x i1>* %y) {
+; CHECK-LABEL: extract_v2i1_nxv2i1_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vse1.v v0, (a0)
+; CHECK-NEXT: ret
+ %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %x, i64 0)
+ store <2 x i1> %c, <2 x i1>* %y
+ ret void
+}
+
+define void @extract_v2i1_nxv2i1_2(<vscale x 2 x i1> %x, <2 x i1>* %y) {
+; CHECK-LABEL: extract_v2i1_nxv2i1_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a1, 2, e8,mf4,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v25, 2
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vse1.v v26, (a0)
+; CHECK-NEXT: ret
+ %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %x, i64 2)
+ store <2 x i1> %c, <2 x i1>* %y
+ ret void
+}
+
+define void @extract_v2i1_nxv64i1_0(<vscale x 64 x i1> %x, <2 x i1>* %y) {
+; CHECK-LABEL: extract_v2i1_nxv64i1_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vse1.v v0, (a0)
+; CHECK-NEXT: ret
+ %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %x, i64 0)
+ store <2 x i1> %c, <2 x i1>* %y
+ ret void
+}
+
+define void @extract_v2i1_nxv64i1_2(<vscale x 64 x i1> %x, <2 x i1>* %y) {
+; CHECK-LABEL: extract_v2i1_nxv64i1_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: vsetivli a1, 2, e8,m8,ta,mu
+; CHECK-NEXT: vslidedown.vi v8, v8, 2
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v8, 0
+; CHECK-NEXT: vse1.v v25, (a0)
+; CHECK-NEXT: ret
+ %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %x, i64 2)
+ store <2 x i1> %c, <2 x i1>* %y
+ ret void
+}
+
+define void @extract_v2i1_nxv64i1_42(<vscale x 64 x i1> %x, <2 x i1>* %y) {
+; CHECK-LABEL: extract_v2i1_nxv64i1_42:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: addi a1, zero, 42
+; CHECK-NEXT: vsetivli a2, 2, e8,m8,ta,mu
+; CHECK-NEXT: vslidedown.vx v8, v8, a1
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v8, 0
+; CHECK-NEXT: vse1.v v25, (a0)
+; CHECK-NEXT: ret
+ %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %x, i64 42)
+ store <2 x i1> %c, <2 x i1>* %y
+ ret void
+}
+
+declare <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %vec, i64 %idx)
+declare <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %vec, i64 %idx)
+
+declare <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %vec, i64 %idx)
+declare <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv2i1(<vscale x 2 x i1> %vec, i64 %idx)
+
+declare <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %vec, i64 %idx)
+declare <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %vec, i64 %idx)
+
declare <2 x i8> @llvm.experimental.vector.extract.v2i8.v8i8(<8 x i8> %vec, i64 %idx)
declare <2 x i32> @llvm.experimental.vector.extract.v2i32.v8i32(<8 x i32> %vec, i64 %idx)
More information about the llvm-commits
mailing list