[llvm] [RISCV][CodeGen] Add CodeGen support of Zvzip extension (PR #189801)
Boyao Wang via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 3 00:58:20 PDT 2026
https://github.com/BoyaoWang430 updated https://github.com/llvm/llvm-project/pull/189801
>From 799ebf33ee338e797a50bea1b99a4add10af45f7 Mon Sep 17 00:00:00 2001
From: wangboyao <wangboyao at bytedance.com>
Date: Wed, 1 Apr 2026 14:10:06 +0800
Subject: [PATCH 1/4] [RISCV][CodeGen] Add CodeGen support of Zvzip extension
This adds the initial Codegen support of the Zvzip standard extension
Co-Authored-By: wangpengcheng wangpengcheng.pp at bytedance.com
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 192 +-
llvm/lib/Target/RISCV/RISCVISelLowering.h | 5 +
llvm/lib/Target/RISCV/RISCVInstrInfoZvzip.td | 113 +
.../fixed-vectors-shuffle-deinterleave2.ll | 393 ++++
.../fixed-vectors-shuffle-int-interleave.ll | 282 +++
.../fixed-vectors-shuffle-zipeven-zipodd.ll | 139 ++
.../RISCV/rvv/vector-deinterleave-fixed.ll | 46 +
.../CodeGen/RISCV/rvv/vector-deinterleave.ll | 276 +++
.../RISCV/rvv/vector-interleave-fixed.ll | 510 +++++
.../CodeGen/RISCV/rvv/vector-interleave.ll | 1868 +++++++++++++++++
10 files changed, 3805 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6a77797918b09..f38f193d5ffcc 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -5207,19 +5207,33 @@ static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2) {
return SDValue();
}
+bool RISCVTargetLowering::isLegalVTForZvzip(MVT VT) const {
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector())
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ // Determine LMUL of the container vector.
+ if (llvm::RISCVTargetLowering::getLMUL(ContainerVT) == RISCVVType::LMUL_8)
+ return false;
+ return true;
+}
+
/// Is this shuffle interleaving contiguous elements from one vector into the
/// even elements and contiguous elements from another vector into the odd
/// elements. \p EvenSrc will contain the element that should be in the first
/// even element. \p OddSrc will contain the element that should be in the first
/// odd element. These can be the first element in a source or the element half
/// way through the source.
-static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
- int &OddSrc, const RISCVSubtarget &Subtarget) {
+bool RISCVTargetLowering::isInterleaveShuffle(
+ ArrayRef<int> Mask, MVT VT, int &EvenSrc, int &OddSrc,
+ const RISCVSubtarget &Subtarget) const {
// We need to be able to widen elements to the next larger integer type or
// use the zip2a instruction at e64.
- if (VT.getScalarSizeInBits() >= Subtarget.getELen() &&
- !Subtarget.hasVendorXRivosVizip())
- return false;
+ if (VT.getScalarSizeInBits() >= Subtarget.getELen()) {
+ if (!Subtarget.hasVendorXRivosVizip() && !Subtarget.hasStdExtZvzip())
+ return false;
+ if (Subtarget.hasStdExtZvzip() && !isLegalVTForZvzip(VT))
+ return false;
+ }
int Size = Mask.size();
int NumElts = VT.getVectorNumElements();
@@ -5674,7 +5688,8 @@ static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1,
const RISCVSubtarget &Subtarget) {
assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc ||
RISCVISD::RI_VZIP2A_VL == Opc || RISCVISD::RI_VZIP2B_VL == Opc ||
- RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc);
+ RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc ||
+ RISCVISD::VPAIRE_VL == Opc || RISCVISD::VPAIRO_VL == Opc);
assert(Op0.getSimpleValueType() == Op1.getSimpleValueType());
MVT VT = Op0.getSimpleValueType();
@@ -5713,6 +5728,64 @@ static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1,
return Res;
}
+static SDValue lowerZvzipVZIP(SDValue Op0, SDValue Op1, const SDLoc &DL,
+ SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget, unsigned Part) {
+ assert(Op0.getSimpleValueType() == Op1.getSimpleValueType());
+ MVT VT = Op0.getSimpleValueType();
+ MVT IntVT = VT.changeVectorElementTypeToInteger();
+ Op0 = DAG.getBitcast(IntVT, Op0);
+ Op1 = DAG.getBitcast(IntVT, Op1);
+ MVT ContainerVT = IntVT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
+ Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
+ Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
+ }
+ MVT ResVT = ContainerVT.getDoubleNumVectorElementsVT();
+ auto [Mask, VL] = getDefaultVLOps(IntVT, ContainerVT, DL, DAG, Subtarget);
+ SDValue Passthru = DAG.getUNDEF(ResVT);
+ SDValue Res =
+ DAG.getNode(RISCVISD::VZIP_VL, DL, ResVT, Op0, Op1, Passthru, Mask, VL);
+ Res = DAG.getExtractSubvector(
+ DL, ContainerVT, Res,
+ Part * ContainerVT.getVectorElementCount().getKnownMinValue());
+ if (IntVT.isFixedLengthVector())
+ Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget);
+ Res = DAG.getBitcast(VT, Res);
+ return Res;
+}
+static SDValue lowerZvzipVUNZIP(unsigned Opc, SDValue Op, const SDLoc &DL,
+ SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(RISCVISD::VUNZIPE_VL == Opc || RISCVISD::VUNZIPO_VL == Opc);
+ MVT VT = Op.getSimpleValueType();
+ MVT IntVT = VT.changeVectorElementTypeToInteger();
+ Op = DAG.getBitcast(IntVT, Op);
+ MVT ContainerVT = IntVT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
+ Op = convertToScalableVector(ContainerVT, Op, DAG, Subtarget);
+ }
+ // We can't handle E64 with LMUL <= 1.
+ if (ContainerVT.getVectorElementType() == MVT::i64 &&
+ RISCVTargetLowering::getLMUL(ContainerVT) == RISCVVType::LMUL_1)
+ return SDValue();
+ MVT ResVT = ContainerVT.getHalfNumVectorElementsVT();
+ MVT HalfVT = VT.getHalfNumVectorElementsVT();
+ MVT HalfIntVT = IntVT.getHalfNumVectorElementsVT();
+ auto [Mask, VL] = getDefaultVLOps(ResVT, ResVT, DL, DAG, Subtarget);
+ if (VT.isFixedLengthVector())
+ VL = DAG.getConstant(VT.getVectorNumElements() / 2, DL,
+ Subtarget.getXLenVT());
+ SDValue Passthru = DAG.getUNDEF(ResVT);
+ SDValue Res = DAG.getNode(Opc, DL, ResVT, Op, Passthru, Mask, VL);
+ if (HalfIntVT.isFixedLengthVector())
+ Res = convertFromScalableVector(HalfIntVT, Res, DAG, Subtarget);
+ Res = DAG.getBitcast(HalfVT, Res);
+ return Res;
+}
+
// Given a vector a, b, c, d return a vector Factor times longer
// with Factor-1 undef's between elements. Ex:
// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
@@ -6212,8 +6285,8 @@ static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG) {
return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
}
-static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
+SDValue RISCVTargetLowering::lowerVECTOR_SHUFFLE(
+ SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) const {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDLoc DL(Op);
@@ -6240,7 +6313,7 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
ISD::SETNE);
}
- MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
// Store the return value in a single variable instead of structured bindings
// so that we can pass it to GetSlide below, which cannot capture structured
@@ -6402,12 +6475,49 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
}
}
- // If this is a deinterleave(2), try using vunzip{a,b}. This mostly catches
- // e64 which can't match above.
+ // If this is a deinterleave(2), try using vunzip{a,b} or vunzip{e,o}. This
+ // mostly catches e64 which can't match above.
unsigned Index = 0;
- if (Subtarget.hasVendorXRivosVizip() &&
+ if ((Subtarget.hasVendorXRivosVizip() || Subtarget.hasStdExtZvzip()) &&
ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, 2, Index) &&
1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
+ if (Subtarget.hasStdExtZvzip() && isLegalVTForZvzip(VT)) {
+ unsigned Opc = Index == 0 ? RISCVISD::VUNZIPE_VL : RISCVISD::VUNZIPO_VL;
+ EVT NewVT = VT.getDoubleNumVectorElementsVT();
+ SDValue Op;
+ if (V2.isUndef()) {
+ Op = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, V1, V2);
+ }
+ if (auto VLEN = Subtarget.getRealVLen();
+ VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0)
+ if (SDValue V = lowerZvzipVUNZIP(
+ Opc, DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, V1, V2), DL,
+ DAG, Subtarget))
+ return V;
+ if (SDValue Src = foldConcatVector(V1, V2))
+ Op = DAG.getExtractSubvector(DL, NewVT, Src, 0);
+ if (Op) {
+ if (SDValue Res = lowerZvzipVUNZIP(Opc, Op, DL, DAG, Subtarget))
+ return DAG.getExtractSubvector(DL, VT, Res, 0);
+ }
+
+ // Deinterleave each source and concatenate them, or concat first, then
+ // deinterleave.
+ if (1 < count_if(Mask,
+ [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
+ 1 < count_if(Mask,
+ [&Mask](int Idx) { return Idx >= (int)Mask.size(); })) {
+ SDValue Lo = lowerZvzipVUNZIP(Opc, V1, DL, DAG, Subtarget);
+ SDValue Hi = lowerZvzipVUNZIP(Opc, V2, DL, DAG, Subtarget);
+ if (Lo && Hi)
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
+ }
+ goto fallback;
+ }
+
+ if (!Subtarget.hasVendorXRivosVizip())
+ goto fallback;
+
unsigned Opc =
Index == 0 ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL;
if (V2.isUndef())
@@ -6449,6 +6559,7 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
}
}
+fallback:
if (SDValue V =
lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
return V;
@@ -6487,8 +6598,15 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
OddV = DAG.getExtractSubvector(DL, HalfVT, OddV, OddSrc % Size);
}
- // Prefer vzip2a if available.
- // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow.
+ // Prefer vzip2a or vzip if available.
+ // TODO: Extend to matching zip2b or vzip if EvenSrc and OddSrc allow.
+ if (Subtarget.hasStdExtZvzip() && isLegalVTForZvzip(VT)) {
+ EvenV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
+ EvenV, DAG.getVectorIdxConstant(0, DL));
+ OddV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), OddV,
+ DAG.getVectorIdxConstant(0, DL));
+ return lowerZvzipVZIP(EvenV, OddV, DL, DAG, Subtarget, /*Part=*/0);
+ }
if (Subtarget.hasVendorXRivosVizip()) {
EvenV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), EvenV, 0);
OddV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), OddV, 0);
@@ -6545,15 +6663,17 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
return convertFromScalableVector(VT, Res, DAG, Subtarget);
}
- if (Subtarget.hasVendorXRivosVizip()) {
+ if (Subtarget.hasVendorXRivosVizip() || Subtarget.hasStdExtZvzip()) {
bool TryWiden = false;
unsigned Factor;
if (isZipEven(SrcInfo, Mask, Factor)) {
if (Factor == 1) {
SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
- return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
- Subtarget);
+ unsigned int Opc = Subtarget.hasStdExtZvzip()
+ ? RISCVISD::VPAIRE_VL
+ : RISCVISD::RI_VZIPEVEN_VL;
+ return lowerVZIP(Opc, Src1, Src2, DL, DAG, Subtarget);
}
TryWiden = true;
}
@@ -6561,8 +6681,10 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
if (Factor == 1) {
SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
- return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG,
- Subtarget);
+ unsigned int Opc = Subtarget.hasStdExtZvzip()
+ ? RISCVISD::VPAIRO_VL
+ : RISCVISD::RI_VZIPODD_VL;
+ return lowerVZIP(Opc, Src1, Src2, DL, DAG, Subtarget);
}
TryWiden = true;
}
@@ -12995,6 +13117,25 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
return DAG.getMergeValues(Res, DL);
}
+ if (Subtarget.hasStdExtZvzip() && Factor == 2) {
+ MVT VT = Op->getSimpleValueType(0);
+ if (isLegalVTForZvzip(VT)) {
+ SDValue V1 = Op->getOperand(0);
+ SDValue V2 = Op->getOperand(1);
+ // Freeze the sources so we can increase their use count.
+ V1 = DAG.getFreeze(V1);
+ V2 = DAG.getFreeze(V2);
+ EVT NewVT = VT.getDoubleNumVectorElementsVT();
+ SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, V1, V2);
+ SDValue Even =
+ lowerZvzipVUNZIP(RISCVISD::VUNZIPE_VL, V, DL, DAG, Subtarget);
+ SDValue Odd =
+ lowerZvzipVUNZIP(RISCVISD::VUNZIPO_VL, V, DL, DAG, Subtarget);
+ if (Even && Odd)
+ return DAG.getMergeValues({Even, Odd}, DL);
+ }
+ }
+
if (Subtarget.hasVendorXRivosVizip() && Factor == 2) {
MVT VT = Op->getSimpleValueType(0);
SDValue V1 = Op->getOperand(0);
@@ -13256,6 +13397,19 @@ SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
return DAG.getMergeValues(Loads, DL);
}
+ if (Subtarget.hasStdExtZvzip() && !Op.getOperand(0).isUndef() &&
+ !Op.getOperand(1).isUndef()) {
+ MVT VT = Op->getSimpleValueType(0);
+ if (isLegalVTForZvzip(VT)) {
+ // Freeze the sources so we can increase their use count.
+ SDValue V1 = DAG.getFreeze(Op->getOperand(0));
+ SDValue V2 = DAG.getFreeze(Op->getOperand(1));
+ SDValue Lo = lowerZvzipVZIP(V1, V2, DL, DAG, Subtarget, /*Part=*/0);
+ SDValue Hi = lowerZvzipVZIP(V1, V2, DL, DAG, Subtarget, /*Part=*/1);
+ return DAG.getMergeValues({Lo, Hi}, DL);
+ }
+ }
+
// Use ri.vzip2{a,b} if available
// TODO: Figure out the best lowering for the spread variants
if (Subtarget.hasVendorXRivosVizip() && !Op.getOperand(0).isUndef() &&
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 8d88aeb7ae3fc..1f5cc09abc11e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -583,6 +583,11 @@ class RISCVTargetLowering : public TargetLowering {
SDValue lowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPARTIAL_REDUCE_MLA(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) const;
+ bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
+ int &OddSrc, const RISCVSubtarget &Subtarget) const;
+ bool isLegalVTForZvzip(MVT VT) const;
SDValue lowerXAndesBfHCvtBFloat16Load(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerXAndesBfHCvtBFloat16Store(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvzip.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvzip.td
index 9fd88ee0ef104..ab507f54c3b90 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvzip.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvzip.td
@@ -29,3 +29,116 @@ let Predicates = [HasStdExtZvzip], Constraints = "@earlyclobber $vd" in {
def VPAIRO_VV : VALUVV<0b001111, OPMVV, "vpairo.vv">;
}
} // Predicates = [HasStdExtZvzip]
+
+def SDT_RISCVZip_VL : SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisSubVecOfVec<1, 0>,
+ SDTCisSameAs<1, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCVecEltisVT<4, i1>,
+ SDTCisSameNumEltsAs<1, 4>,
+ SDTCisVT<5, XLenVT>]>;
+def SDT_RISCVUnzip_VL : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisSubVecOfVec<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCVecEltisVT<3, i1>,
+ SDTCisSameNumEltsAs<1, 3>,
+ SDTCisVT<4, XLenVT>]>;
+// These are modeled after the int binop VL nodes
+let HasPassthruOp = true, HasMaskOp = true in {
+def vzip_vl : RVSDNode<"VZIP_VL", SDT_RISCVZip_VL>;
+def vunzipe_vl : RVSDNode<"VUNZIPE_VL", SDT_RISCVUnzip_VL>;
+def vunzipo_vl : RVSDNode<"VUNZIPO_VL", SDT_RISCVUnzip_VL>;
+def vpaire_vl : RVSDNode<"VPAIRE_VL", SDT_RISCVIntBinOp_VL>;
+def vpairo_vl : RVSDNode<"VPAIRO_VL", SDT_RISCVIntBinOp_VL>;
+} // HasPassthruOp = true, HasMaskOp = true
+multiclass VPseudoVZIP {
+ foreach m = MxListW in
+ defm "" : VPseudoBinaryW_VV<m, Commutable=0>;
+}
+multiclass VPseudoVUNZIP {
+ foreach m = MxListW in {
+ defvar mx = m.MX;
+ let VLMul = m.value in {
+ def "_V_" # mx : VPseudoUnaryNoMask<m.vrclass, m.wvrclass>,
+ SchedUnary<"WriteVIALUV", "ReadVIALUV", mx, forcePassthruRead=true>;
+ def "_V_" # mx # "_MASK" :
+ VPseudoUnaryMask<m.vrclass, m.wvrclass>,
+ RISCVMaskedPseudo<MaskIdx=2>,
+ SchedUnary<"WriteVIALUV", "ReadVIALUV", mx, forcePassthruRead=true>;
+ }
+ }
+}
+let Predicates = [HasStdExtZvzip],
+ Constraints = "@earlyclobber $rd, $rd = $passthru" in {
+defm PseudoVZIP : VPseudoVZIP;
+defm PseudoVUNZIPE : VPseudoVUNZIP;
+defm PseudoVUNZIPO : VPseudoVUNZIP;
+defm PseudoVPAIRE : VPseudoVALU_VV;
+defm PseudoVPAIRO : VPseudoVALU_VV;
+}
+defset list<VTypeInfoToWide> AllZvzipVectors = {
+ def : VTypeInfoToWide<VI8MF8, VI8MF4>;
+ def : VTypeInfoToWide<VI8MF4, VI8MF2>;
+ def : VTypeInfoToWide<VI8MF2, VI8M1>;
+ def : VTypeInfoToWide<VI8M1, VI8M2>;
+ def : VTypeInfoToWide<VI8M2, VI8M4>;
+ def : VTypeInfoToWide<VI8M4, VI8M8>;
+ def : VTypeInfoToWide<VI16MF4, VI16MF2>;
+ def : VTypeInfoToWide<VI16MF2, VI16M1>;
+ def : VTypeInfoToWide<VI16M1, VI16M2>;
+ def : VTypeInfoToWide<VI16M2, VI16M4>;
+ def : VTypeInfoToWide<VI16M4, VI16M8>;
+ def : VTypeInfoToWide<VI32MF2, VI32M1>;
+ def : VTypeInfoToWide<VI32M1, VI32M2>;
+ def : VTypeInfoToWide<VI32M2, VI32M4>;
+ def : VTypeInfoToWide<VI32M4, VI32M8>;
+ def : VTypeInfoToWide<VI64M1, VI64M2>;
+ def : VTypeInfoToWide<VI64M2, VI64M4>;
+ def : VTypeInfoToWide<VI64M4, VI64M8>;
+}
+multiclass VPatVZIP<SDPatternOperator vop, string instruction_name> {
+ foreach VtiToWti = AllZvzipVectors in {
+ defvar vti = VtiToWti.Vti;
+ defvar wti = VtiToWti.Wti;
+ let Predicates = !listconcat([HasStdExtZvzip], GetVTypePredicates<vti>.Predicates) in {
+ def : Pat<(wti.Vector (vop
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2),
+ (wti.Vector wti.RegClass:$passthru),
+ (vti.Mask VMV0:$vm),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX#"_MASK")
+ wti.RegClass:$passthru,
+ vti.RegClass:$rs1,
+ vti.RegClass:$rs2,
+ (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+ }
+}
+multiclass VPatVUNZIP<SDPatternOperator op, string instruction_name> {
+ foreach VtiToWti = AllZvzipVectors in {
+ defvar vti = VtiToWti.Vti;
+ defvar wti = VtiToWti.Wti;
+ let Predicates = !listconcat([HasStdExtZvzip],
+ GetVTypePredicates<vti>.Predicates) in {
+ def : Pat<(vti.Vector (op (wti.Vector wti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$passthru),
+ (wti.Mask VMV0:$vm),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_V_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$passthru,
+ wti.RegClass:$rs1,
+ (wti.Mask VMV0:$vm),
+ GPR:$vl,
+ vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+ }
+ }
+}
+defm : VPatVZIP<vzip_vl, "PseudoVZIP">;
+defm : VPatVUNZIP<vunzipe_vl, "PseudoVUNZIPE">;
+defm : VPatVUNZIP<vunzipo_vl, "PseudoVUNZIPO">;
+defm : VPatBinaryVL_VV<vpaire_vl, "PseudoVPAIRE">;
+defm : VPatBinaryVL_VV<vpairo_vl, "PseudoVPAIRO">;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
index 3a14f87c3f18a..c31c283460c61 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
@@ -8,6 +8,9 @@
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvl256b,+experimental-xrivosvizip \
; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \
; RUN: | FileCheck %s --check-prefixes=CHECK,ZIP
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvl256b,+experimental-zvzip \
+; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \
+; RUN: | FileCheck %s --check-prefixes=CHECK,ZVZIP
define void @vnsrl_0_i8(ptr %in, ptr %out) {
; CHECK-LABEL: vnsrl_0_i8:
@@ -68,6 +71,15 @@ define void @vnsrl_0_i16(ptr %in, ptr %out) {
; ZIP-NEXT: vnsrl.wi v8, v8, 0
; ZIP-NEXT: vse16.v v8, (a1)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_0_i16:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVZIP-NEXT: vle16.v v8, (a0)
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVZIP-NEXT: vnsrl.wi v8, v8, 0
+; ZVZIP-NEXT: vse16.v v8, (a1)
+; ZVZIP-NEXT: ret
entry:
%0 = load <8 x i16>, ptr %in, align 2
%shuffle.i5 = shufflevector <8 x i16> %0, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -102,6 +114,15 @@ define void @vnsrl_16_i16(ptr %in, ptr %out) {
; ZIP-NEXT: vnsrl.wi v8, v8, 16
; ZIP-NEXT: vse16.v v8, (a1)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_16_i16:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVZIP-NEXT: vle16.v v8, (a0)
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVZIP-NEXT: vnsrl.wi v8, v8, 16
+; ZVZIP-NEXT: vse16.v v8, (a1)
+; ZVZIP-NEXT: ret
entry:
%0 = load <8 x i16>, ptr %in, align 2
%shuffle.i5 = shufflevector <8 x i16> %0, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -136,6 +157,15 @@ define void @vnsrl_0_half(ptr %in, ptr %out) {
; ZIP-NEXT: vnsrl.wi v8, v8, 0
; ZIP-NEXT: vse16.v v8, (a1)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_0_half:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVZIP-NEXT: vle16.v v8, (a0)
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVZIP-NEXT: vnsrl.wi v8, v8, 0
+; ZVZIP-NEXT: vse16.v v8, (a1)
+; ZVZIP-NEXT: ret
entry:
%0 = load <8 x half>, ptr %in, align 2
%shuffle.i5 = shufflevector <8 x half> %0, <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -170,6 +200,15 @@ define void @vnsrl_16_half(ptr %in, ptr %out) {
; ZIP-NEXT: vnsrl.wi v8, v8, 16
; ZIP-NEXT: vse16.v v8, (a1)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_16_half:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVZIP-NEXT: vle16.v v8, (a0)
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVZIP-NEXT: vnsrl.wi v8, v8, 16
+; ZVZIP-NEXT: vse16.v v8, (a1)
+; ZVZIP-NEXT: ret
entry:
%0 = load <8 x half>, ptr %in, align 2
%shuffle.i5 = shufflevector <8 x half> %0, <8 x half> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -205,6 +244,15 @@ define void @vnsrl_0_i32(ptr %in, ptr %out) {
; ZIP-NEXT: vnsrl.wi v8, v8, 0
; ZIP-NEXT: vse32.v v8, (a1)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_0_i32:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; ZVZIP-NEXT: vle32.v v8, (a0)
+; ZVZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; ZVZIP-NEXT: vnsrl.wi v8, v8, 0
+; ZVZIP-NEXT: vse32.v v8, (a1)
+; ZVZIP-NEXT: ret
entry:
%0 = load <4 x i32>, ptr %in, align 4
%shuffle.i5 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
@@ -243,6 +291,16 @@ define void @vnsrl_32_i32(ptr %in, ptr %out) {
; ZIP-NEXT: vnsrl.wx v8, v8, a0
; ZIP-NEXT: vse32.v v8, (a1)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_32_i32:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; ZVZIP-NEXT: vle32.v v8, (a0)
+; ZVZIP-NEXT: li a0, 32
+; ZVZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; ZVZIP-NEXT: vnsrl.wx v8, v8, a0
+; ZVZIP-NEXT: vse32.v v8, (a1)
+; ZVZIP-NEXT: ret
entry:
%0 = load <4 x i32>, ptr %in, align 4
%shuffle.i5 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
@@ -278,6 +336,15 @@ define void @vnsrl_0_float(ptr %in, ptr %out) {
; ZIP-NEXT: vnsrl.wi v8, v8, 0
; ZIP-NEXT: vse32.v v8, (a1)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_0_float:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; ZVZIP-NEXT: vle32.v v8, (a0)
+; ZVZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; ZVZIP-NEXT: vnsrl.wi v8, v8, 0
+; ZVZIP-NEXT: vse32.v v8, (a1)
+; ZVZIP-NEXT: ret
entry:
%0 = load <4 x float>, ptr %in, align 4
%shuffle.i5 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 0, i32 2>
@@ -316,6 +383,16 @@ define void @vnsrl_32_float(ptr %in, ptr %out) {
; ZIP-NEXT: vnsrl.wx v8, v8, a0
; ZIP-NEXT: vse32.v v8, (a1)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_32_float:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; ZVZIP-NEXT: vle32.v v8, (a0)
+; ZVZIP-NEXT: li a0, 32
+; ZVZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; ZVZIP-NEXT: vnsrl.wx v8, v8, a0
+; ZVZIP-NEXT: vse32.v v8, (a1)
+; ZVZIP-NEXT: ret
entry:
%0 = load <4 x float>, ptr %in, align 4
%shuffle.i5 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 1, i32 3>
@@ -350,6 +427,16 @@ define void @vnsrl_0_i64(ptr %in, ptr %out) {
; ZIP-NEXT: ri.vunzip2a.vv v10, v8, v9
; ZIP-NEXT: vse64.v v10, (a1)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_0_i64:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; ZVZIP-NEXT: vle64.v v8, (a0)
+; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVZIP-NEXT: vslidedown.vi v9, v8, 2
+; ZVZIP-NEXT: vslideup.vi v8, v9, 1
+; ZVZIP-NEXT: vse64.v v8, (a1)
+; ZVZIP-NEXT: ret
entry:
%0 = load <4 x i64>, ptr %in, align 8
%shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> <i32 0, i32 2>
@@ -385,6 +472,16 @@ define void @vnsrl_64_i64(ptr %in, ptr %out) {
; ZIP-NEXT: ri.vunzip2b.vv v10, v8, v9
; ZIP-NEXT: vse64.v v10, (a1)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_64_i64:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; ZVZIP-NEXT: vle64.v v8, (a0)
+; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVZIP-NEXT: vslidedown.vi v9, v8, 2
+; ZVZIP-NEXT: vpairo.vv v10, v8, v9
+; ZVZIP-NEXT: vse64.v v10, (a1)
+; ZVZIP-NEXT: ret
entry:
%0 = load <4 x i64>, ptr %in, align 8
%shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> <i32 1, i32 3>
@@ -419,6 +516,16 @@ define void @vnsrl_0_double(ptr %in, ptr %out) {
; ZIP-NEXT: ri.vunzip2a.vv v10, v8, v9
; ZIP-NEXT: vse64.v v10, (a1)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_0_double:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; ZVZIP-NEXT: vle64.v v8, (a0)
+; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVZIP-NEXT: vslidedown.vi v9, v8, 2
+; ZVZIP-NEXT: vslideup.vi v8, v9, 1
+; ZVZIP-NEXT: vse64.v v8, (a1)
+; ZVZIP-NEXT: ret
entry:
%0 = load <4 x double>, ptr %in, align 8
%shuffle.i5 = shufflevector <4 x double> %0, <4 x double> poison, <2 x i32> <i32 0, i32 2>
@@ -454,6 +561,16 @@ define void @vnsrl_64_double(ptr %in, ptr %out) {
; ZIP-NEXT: ri.vunzip2b.vv v10, v8, v9
; ZIP-NEXT: vse64.v v10, (a1)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_64_double:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; ZVZIP-NEXT: vle64.v v8, (a0)
+; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVZIP-NEXT: vslidedown.vi v9, v8, 2
+; ZVZIP-NEXT: vpairo.vv v10, v8, v9
+; ZVZIP-NEXT: vse64.v v10, (a1)
+; ZVZIP-NEXT: ret
entry:
%0 = load <4 x double>, ptr %in, align 8
%shuffle.i5 = shufflevector <4 x double> %0, <4 x double> poison, <2 x i32> <i32 1, i32 3>
@@ -566,6 +683,15 @@ define void @vnsrl_0_i8_single_src(ptr %in, ptr %out) {
; ZIP-NEXT: vnsrl.wi v8, v8, 0
; ZIP-NEXT: vse8.v v8, (a1)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_0_i8_single_src:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
+; ZVZIP-NEXT: vle8.v v8, (a0)
+; ZVZIP-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
+; ZVZIP-NEXT: vnsrl.wi v8, v8, 0
+; ZVZIP-NEXT: vse8.v v8, (a1)
+; ZVZIP-NEXT: ret
entry:
%0 = load <8 x i8>, ptr %in, align 1
%shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -600,6 +726,15 @@ define void @vnsrl_8_i8_single_src(ptr %in, ptr %out) {
; ZIP-NEXT: vnsrl.wi v8, v8, 8
; ZIP-NEXT: vse8.v v8, (a1)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_8_i8_single_src:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
+; ZVZIP-NEXT: vle8.v v8, (a0)
+; ZVZIP-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
+; ZVZIP-NEXT: vnsrl.wi v8, v8, 8
+; ZVZIP-NEXT: vse8.v v8, (a1)
+; ZVZIP-NEXT: ret
entry:
%0 = load <8 x i8>, ptr %in, align 1
%shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -637,6 +772,16 @@ define void @vnsrl_0_i8_single_wideuse(ptr %in, ptr %out) {
; ZIP-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
; ZIP-NEXT: vse8.v v8, (a1)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_0_i8_single_wideuse:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
+; ZVZIP-NEXT: vle8.v v8, (a0)
+; ZVZIP-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
+; ZVZIP-NEXT: vnsrl.wi v8, v8, 0
+; ZVZIP-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
+; ZVZIP-NEXT: vse8.v v8, (a1)
+; ZVZIP-NEXT: ret
entry:
%0 = load <8 x i8>, ptr %in, align 1
%shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -684,6 +829,18 @@ define void @vnsrl_0_i32_single_src_m8(ptr %in, ptr %out) {
; ZIP-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; ZIP-NEXT: vse32.v v16, (a1)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_0_i32_single_src_m8:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: li a2, 64
+; ZVZIP-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; ZVZIP-NEXT: vle32.v v8, (a0)
+; ZVZIP-NEXT: li a0, 32
+; ZVZIP-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; ZVZIP-NEXT: vnsrl.wi v16, v8, 0
+; ZVZIP-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; ZVZIP-NEXT: vse32.v v16, (a1)
+; ZVZIP-NEXT: ret
entry:
%0 = load <64 x i32>, ptr %in, align 4
%shuffle.i5 = shufflevector <64 x i32> %0, <64 x i32> poison, <64 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -729,6 +886,18 @@ define void @vnsrl_0_i32_single_src_m8_2(ptr %in, ptr %out) {
; ZIP-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; ZIP-NEXT: vse32.v v16, (a1)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_0_i32_single_src_m8_2:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: li a2, 64
+; ZVZIP-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; ZVZIP-NEXT: vle32.v v8, (a0)
+; ZVZIP-NEXT: li a0, 32
+; ZVZIP-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; ZVZIP-NEXT: vnsrl.wi v16, v8, 0
+; ZVZIP-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; ZVZIP-NEXT: vse32.v v16, (a1)
+; ZVZIP-NEXT: ret
entry:
%0 = load <64 x i32>, ptr %in, align 4
%shuffle.i5 = shufflevector <64 x i32> %0, <64 x i32> poison, <64 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -775,6 +944,19 @@ define void @vnsrl_0_i8_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZIP-NEXT: vslideup.vi v9, v8, 4
; ZIP-NEXT: vse8.v v9, (a2)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_0_i8_two_source:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
+; ZVZIP-NEXT: vle8.v v8, (a1)
+; ZVZIP-NEXT: vle8.v v9, (a0)
+; ZVZIP-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
+; ZVZIP-NEXT: vnsrl.wi v8, v8, 0
+; ZVZIP-NEXT: vnsrl.wi v9, v9, 0
+; ZVZIP-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
+; ZVZIP-NEXT: vslideup.vi v9, v8, 4
+; ZVZIP-NEXT: vse8.v v9, (a2)
+; ZVZIP-NEXT: ret
entry:
%0 = load <8 x i8>, ptr %in0, align 1
%1 = load <8 x i8>, ptr %in1, align 1
@@ -822,6 +1004,19 @@ define void @vnsrl_8_8_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZIP-NEXT: vslideup.vi v9, v8, 4
; ZIP-NEXT: vse8.v v9, (a2)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_8_8_two_source:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
+; ZVZIP-NEXT: vle8.v v8, (a1)
+; ZVZIP-NEXT: vle8.v v9, (a0)
+; ZVZIP-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
+; ZVZIP-NEXT: vnsrl.wi v8, v8, 8
+; ZVZIP-NEXT: vnsrl.wi v9, v9, 8
+; ZVZIP-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
+; ZVZIP-NEXT: vslideup.vi v9, v8, 4
+; ZVZIP-NEXT: vse8.v v9, (a2)
+; ZVZIP-NEXT: ret
entry:
%0 = load <8 x i8>, ptr %in0, align 1
%1 = load <8 x i8>, ptr %in1, align 1
@@ -869,6 +1064,19 @@ define void @vnsrl_0_i16_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZIP-NEXT: vslideup.vi v9, v8, 2
; ZIP-NEXT: vse16.v v9, (a2)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_0_i16_two_source:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVZIP-NEXT: vle16.v v8, (a1)
+; ZVZIP-NEXT: vle16.v v9, (a0)
+; ZVZIP-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVZIP-NEXT: vnsrl.wi v8, v8, 0
+; ZVZIP-NEXT: vnsrl.wi v9, v9, 0
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVZIP-NEXT: vslideup.vi v9, v8, 2
+; ZVZIP-NEXT: vse16.v v9, (a2)
+; ZVZIP-NEXT: ret
entry:
%0 = load <4 x i16>, ptr %in0, align 2
%1 = load <4 x i16>, ptr %in1, align 2
@@ -916,6 +1124,19 @@ define void @vnsrl_16_i16_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZIP-NEXT: vslideup.vi v9, v8, 2
; ZIP-NEXT: vse16.v v9, (a2)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_16_i16_two_source:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVZIP-NEXT: vle16.v v8, (a1)
+; ZVZIP-NEXT: vle16.v v9, (a0)
+; ZVZIP-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVZIP-NEXT: vnsrl.wi v8, v8, 16
+; ZVZIP-NEXT: vnsrl.wi v9, v9, 16
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVZIP-NEXT: vslideup.vi v9, v8, 2
+; ZVZIP-NEXT: vse16.v v9, (a2)
+; ZVZIP-NEXT: ret
entry:
%0 = load <4 x i16>, ptr %in0, align 2
%1 = load <4 x i16>, ptr %in1, align 2
@@ -963,6 +1184,19 @@ define void @vnsrl_0_half_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZIP-NEXT: vslideup.vi v9, v8, 2
; ZIP-NEXT: vse16.v v9, (a2)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_0_half_two_source:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVZIP-NEXT: vle16.v v8, (a1)
+; ZVZIP-NEXT: vle16.v v9, (a0)
+; ZVZIP-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVZIP-NEXT: vnsrl.wi v8, v8, 0
+; ZVZIP-NEXT: vnsrl.wi v9, v9, 0
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVZIP-NEXT: vslideup.vi v9, v8, 2
+; ZVZIP-NEXT: vse16.v v9, (a2)
+; ZVZIP-NEXT: ret
entry:
%0 = load <4 x half>, ptr %in0, align 2
%1 = load <4 x half>, ptr %in1, align 2
@@ -1010,6 +1244,19 @@ define void @vnsrl_16_half_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZIP-NEXT: vslideup.vi v9, v8, 2
; ZIP-NEXT: vse16.v v9, (a2)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_16_half_two_source:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVZIP-NEXT: vle16.v v8, (a1)
+; ZVZIP-NEXT: vle16.v v9, (a0)
+; ZVZIP-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVZIP-NEXT: vnsrl.wi v8, v8, 16
+; ZVZIP-NEXT: vnsrl.wi v9, v9, 16
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVZIP-NEXT: vslideup.vi v9, v8, 2
+; ZVZIP-NEXT: vse16.v v9, (a2)
+; ZVZIP-NEXT: ret
entry:
%0 = load <4 x half>, ptr %in0, align 2
%1 = load <4 x half>, ptr %in1, align 2
@@ -1045,6 +1292,15 @@ define void @vnsrl_0_i32_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZIP-NEXT: vslideup.vi v8, v9, 1
; ZIP-NEXT: vse32.v v8, (a2)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_0_i32_two_source:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; ZVZIP-NEXT: vle32.v v8, (a0)
+; ZVZIP-NEXT: vle32.v v9, (a1)
+; ZVZIP-NEXT: vslideup.vi v8, v9, 1
+; ZVZIP-NEXT: vse32.v v8, (a2)
+; ZVZIP-NEXT: ret
entry:
%0 = load <2 x i32>, ptr %in0, align 4
%1 = load <2 x i32>, ptr %in1, align 4
@@ -1082,6 +1338,15 @@ define void @vnsrl_32_i32_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9
; ZIP-NEXT: vse32.v v10, (a2)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_32_i32_two_source:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; ZVZIP-NEXT: vle32.v v8, (a0)
+; ZVZIP-NEXT: vle32.v v9, (a1)
+; ZVZIP-NEXT: vpairo.vv v10, v8, v9
+; ZVZIP-NEXT: vse32.v v10, (a2)
+; ZVZIP-NEXT: ret
entry:
%0 = load <2 x i32>, ptr %in0, align 4
%1 = load <2 x i32>, ptr %in1, align 4
@@ -1117,6 +1382,15 @@ define void @vnsrl_0_float_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZIP-NEXT: vslideup.vi v8, v9, 1
; ZIP-NEXT: vse32.v v8, (a2)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_0_float_two_source:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; ZVZIP-NEXT: vle32.v v8, (a0)
+; ZVZIP-NEXT: vle32.v v9, (a1)
+; ZVZIP-NEXT: vslideup.vi v8, v9, 1
+; ZVZIP-NEXT: vse32.v v8, (a2)
+; ZVZIP-NEXT: ret
entry:
%0 = load <2 x float>, ptr %in0, align 4
%1 = load <2 x float>, ptr %in1, align 4
@@ -1154,6 +1428,15 @@ define void @vnsrl_32_float_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9
; ZIP-NEXT: vse32.v v10, (a2)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_32_float_two_source:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; ZVZIP-NEXT: vle32.v v8, (a0)
+; ZVZIP-NEXT: vle32.v v9, (a1)
+; ZVZIP-NEXT: vpairo.vv v10, v8, v9
+; ZVZIP-NEXT: vse32.v v10, (a2)
+; ZVZIP-NEXT: ret
entry:
%0 = load <2 x float>, ptr %in0, align 4
%1 = load <2 x float>, ptr %in1, align 4
@@ -1189,6 +1472,15 @@ define void @vnsrl_0_i64_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZIP-NEXT: vslideup.vi v8, v9, 1
; ZIP-NEXT: vse64.v v8, (a2)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_0_i64_two_source:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVZIP-NEXT: vle64.v v8, (a0)
+; ZVZIP-NEXT: vle64.v v9, (a1)
+; ZVZIP-NEXT: vslideup.vi v8, v9, 1
+; ZVZIP-NEXT: vse64.v v8, (a2)
+; ZVZIP-NEXT: ret
entry:
%0 = load <2 x i64>, ptr %in0, align 8
%1 = load <2 x i64>, ptr %in1, align 8
@@ -1226,6 +1518,15 @@ define void @vnsrl_64_i64_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9
; ZIP-NEXT: vse64.v v10, (a2)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_64_i64_two_source:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVZIP-NEXT: vle64.v v8, (a0)
+; ZVZIP-NEXT: vle64.v v9, (a1)
+; ZVZIP-NEXT: vpairo.vv v10, v8, v9
+; ZVZIP-NEXT: vse64.v v10, (a2)
+; ZVZIP-NEXT: ret
entry:
%0 = load <2 x i64>, ptr %in0, align 8
%1 = load <2 x i64>, ptr %in1, align 8
@@ -1260,6 +1561,15 @@ define void @vnsrl_0_double_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZIP-NEXT: vslideup.vi v8, v9, 1
; ZIP-NEXT: vse64.v v8, (a2)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_0_double_two_source:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVZIP-NEXT: vle64.v v8, (a0)
+; ZVZIP-NEXT: vle64.v v9, (a1)
+; ZVZIP-NEXT: vslideup.vi v8, v9, 1
+; ZVZIP-NEXT: vse64.v v8, (a2)
+; ZVZIP-NEXT: ret
entry:
%0 = load <2 x double>, ptr %in0, align 8
%1 = load <2 x double>, ptr %in1, align 8
@@ -1295,6 +1605,15 @@ define void @vnsrl_64_double_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9
; ZIP-NEXT: vse64.v v10, (a2)
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vnsrl_64_double_two_source:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVZIP-NEXT: vle64.v v8, (a0)
+; ZVZIP-NEXT: vle64.v v9, (a1)
+; ZVZIP-NEXT: vpairo.vv v10, v8, v9
+; ZVZIP-NEXT: vse64.v v10, (a2)
+; ZVZIP-NEXT: ret
entry:
%0 = load <2 x double>, ptr %in0, align 8
%1 = load <2 x double>, ptr %in1, align 8
@@ -1320,6 +1639,12 @@ define <2 x i64> @unzip2a_dual_v2i64(<2 x i64> %a, <2 x i64> %b) {
; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; ZIP-NEXT: vslideup.vi v8, v9, 1
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: unzip2a_dual_v2i64:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v9, 1
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %c
@@ -1370,6 +1695,17 @@ define <4 x i64> @unzip2a_dual_v4i64(<4 x i64> %a, <4 x i64> %b) {
; ZIP-NEXT: vslideup.vi v9, v11, 2
; ZIP-NEXT: vmv.v.v v8, v9
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: unzip2a_dual_v4i64:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, ta, mu
+; ZVZIP-NEXT: vmv.v.i v0, 8
+; ZVZIP-NEXT: vslideup.vi v10, v9, 2
+; ZVZIP-NEXT: vslideup.vi v10, v9, 1, v0.t
+; ZVZIP-NEXT: vmv.v.i v0, 12
+; ZVZIP-NEXT: vunzipe.v v11, v8
+; ZVZIP-NEXT: vmerge.vvm v8, v11, v10, v0
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
ret <4 x i64> %c
@@ -1506,6 +1842,16 @@ define <16 x i64> @unzip2a_dual_v16i64(<16 x i64> %a, <16 x i64> %b) {
; ZIP-NEXT: vslideup.vi v12, v16, 8
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: unzip2a_dual_v16i64:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 8, e64, m2, ta, ma
+; ZVZIP-NEXT: vunzipe.v v16, v12
+; ZVZIP-NEXT: vunzipe.v v12, v8
+; ZVZIP-NEXT: vsetivli zero, 16, e64, m4, ta, ma
+; ZVZIP-NEXT: vslideup.vi v12, v16, 8
+; ZVZIP-NEXT: vmv.v.v v8, v12
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <16 x i64> %a, <16 x i64> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
ret <16 x i64> %c
@@ -1554,6 +1900,13 @@ define <4 x i64> @unzip2a_dual_v4i64_exact(<4 x i64> %a, <4 x i64> %b) vscale_ra
; ZIP-NEXT: ri.vunzip2a.vv v10, v8, v9
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: unzip2a_dual_v4i64_exact:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; ZVZIP-NEXT: vunzipe.v v10, v8
+; ZVZIP-NEXT: vmv.v.v v8, v10
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
ret <4 x i64> %c
@@ -1603,6 +1956,20 @@ define <4 x i64> @unzip2a_dual_v4i64_exact_nf2(<4 x i64> %a, <4 x i64> %b) vscal
; ZIP-NEXT: ri.vunzip2a.vv v9, v8, v10
; ZIP-NEXT: vmv.v.v v8, v9
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: unzip2a_dual_v4i64_exact_nf2:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, ta, mu
+; ZVZIP-NEXT: vmv.v.i v0, 8
+; ZVZIP-NEXT: vslideup.vi v12, v9, 2
+; ZVZIP-NEXT: vslideup.vi v12, v9, 1, v0.t
+; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVZIP-NEXT: vslidedown.vi v9, v8, 2
+; ZVZIP-NEXT: vmv.v.i v0, 12
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; ZVZIP-NEXT: vzip.vv v10, v8, v9
+; ZVZIP-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
ret <4 x i64> %c
@@ -1730,6 +2097,13 @@ define <16 x i64> @unzip2a_dual_v16i64_exact(<16 x i64> %a, <16 x i64> %b) vscal
; ZIP-NEXT: ri.vunzip2a.vv v16, v8, v12
; ZIP-NEXT: vmv.v.v v8, v16
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: unzip2a_dual_v16i64_exact:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 16, e64, m4, ta, ma
+; ZVZIP-NEXT: vunzipe.v v16, v8
+; ZVZIP-NEXT: vmv.v.v v8, v16
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <16 x i64> %a, <16 x i64> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
ret <16 x i64> %c
@@ -1781,6 +2155,18 @@ define <4 x i64> @unzip2b_dual_v4i64(<4 x i64> %a, <4 x i64> %b) {
; ZIP-NEXT: vslideup.vi v9, v11, 2
; ZIP-NEXT: vmv.v.v v8, v9
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: unzip2b_dual_v4i64:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVZIP-NEXT: vmv1r.v v10, v9
+; ZVZIP-NEXT: vslidedown.vi v11, v9, 2
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; ZVZIP-NEXT: vunzipo.v v12, v8
+; ZVZIP-NEXT: vmv.v.i v0, 12
+; ZVZIP-NEXT: vzip.vv v8, v10, v11
+; ZVZIP-NEXT: vmerge.vvm v8, v12, v8, v0
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
ret <4 x i64> %c
@@ -1830,6 +2216,13 @@ define <4 x i64> @unzip2b_dual_v4i64_exact(<4 x i64> %a, <4 x i64> %b) vscale_ra
; ZIP-NEXT: ri.vunzip2b.vv v10, v8, v9
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: unzip2b_dual_v4i64_exact:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; ZVZIP-NEXT: vunzipo.v v10, v8
+; ZVZIP-NEXT: vmv.v.v v8, v10
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
ret <4 x i64> %c
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll
index 9bbfad4f32ffa..33b48a1a76196 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll
@@ -5,6 +5,8 @@
; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+zvl512b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512
; RUN: llc -mtriple=riscv32 -mattr=+v,+m,+experimental-xrivosvizip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZIP,RV32-ZIP
; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+experimental-xrivosvizip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZIP,RV64-ZIP
+; RUN: llc -mtriple=riscv32 -mattr=+v,+m,+experimental-zvzip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVZIP,RV32-ZVZIP
+; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+experimental-zvzip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVZIP,RV64-ZVZIP
; Test optimizing interleaves to widening arithmetic.
@@ -24,6 +26,13 @@ define <4 x i8> @interleave_v2i8(<2 x i8> %x, <2 x i8> %y) {
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9
; ZIP-NEXT: vmv1r.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_v2i8:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVZIP-NEXT: vzip.vv v10, v8, v9
+; ZVZIP-NEXT: vmv1r.v v8, v10
+; ZVZIP-NEXT: ret
%a = shufflevector <2 x i8> %x, <2 x i8> %y, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
ret <4 x i8> %a
}
@@ -44,6 +53,13 @@ define <4 x i16> @interleave_v2i16(<2 x i16> %x, <2 x i16> %y) {
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9
; ZIP-NEXT: vmv1r.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_v2i16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVZIP-NEXT: vzip.vv v10, v8, v9
+; ZVZIP-NEXT: vmv1r.v v8, v10
+; ZVZIP-NEXT: ret
%a = shufflevector <2 x i16> %x, <2 x i16> %y, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
ret <4 x i16> %a
}
@@ -65,6 +81,14 @@ define <4 x i32> @interleave_v2i32(<2 x i32> %x, <2 x i32> %y) {
; ZIP-NEXT: ri.vzip2a.vv v10, v9, v8
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_v2i32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVZIP-NEXT: vmv1r.v v10, v9
+; ZVZIP-NEXT: vmv1r.v v11, v8
+; ZVZIP-NEXT: vzip.vv v8, v10, v11
+; ZVZIP-NEXT: ret
%a = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
ret <4 x i32> %a
}
@@ -103,6 +127,14 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_v2i64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVZIP-NEXT: vmv1r.v v14, v9
+; ZVZIP-NEXT: vmv1r.v v12, v8
+; ZVZIP-NEXT: vzip.vv v8, v12, v14
+; ZVZIP-NEXT: ret
%a = shufflevector <2 x i64> %x, <2 x i64> %y, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
ret <4 x i64> %a
}
@@ -133,6 +165,13 @@ define <8 x i8> @interleave_v4i8(<4 x i8> %x, <4 x i8> %y) {
; ZIP-NEXT: ri.vzip2a.vv v10, v9, v8
; ZIP-NEXT: vmv1r.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_v4i8:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; ZVZIP-NEXT: vzip.vv v10, v9, v8
+; ZVZIP-NEXT: vmv1r.v v8, v10
+; ZVZIP-NEXT: ret
%a = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3>
ret <8 x i8> %a
}
@@ -163,6 +202,14 @@ define <8 x i16> @interleave_v4i16(<4 x i16> %x, <4 x i16> %y) {
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_v4i16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVZIP-NEXT: vmv1r.v v10, v9
+; ZVZIP-NEXT: vmv1r.v v11, v8
+; ZVZIP-NEXT: vzip.vv v8, v11, v10
+; ZVZIP-NEXT: ret
%a = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> <i32 0, i32 4, i32 poison, i32 5, i32 2, i32 poison, i32 3, i32 7>
ret <8 x i16> %a
}
@@ -194,6 +241,14 @@ define <8 x i32> @interleave_v4i32(<4 x i32> %x, <4 x i32> %y) {
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_v4i32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZVZIP-NEXT: vmv1r.v v14, v9
+; ZVZIP-NEXT: vmv1r.v v12, v8
+; ZVZIP-NEXT: vzip.vv v8, v12, v14
+; ZVZIP-NEXT: ret
%a = shufflevector <4 x i32> %x, <4 x i32> %y, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
ret <8 x i32> %a
}
@@ -229,6 +284,15 @@ define <4 x i32> @interleave_v4i32_offset_2(<4 x i32> %x, <4 x i32> %y) {
; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10
; ZIP-NEXT: vmv.v.v v8, v9
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_v4i32_offset_2:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; ZVZIP-NEXT: vmv1r.v v10, v8
+; ZVZIP-NEXT: vslidedown.vi v11, v9, 2
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVZIP-NEXT: vzip.vv v8, v10, v11
+; ZVZIP-NEXT: ret
%a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 6, i32 1, i32 7>
ret <4 x i32> %a
}
@@ -271,6 +335,17 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
; ZIP-NEXT: ri.vzip2a.vv v11, v8, v9
; ZIP-NEXT: vmerge.vvm v8, v11, v10, v0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_v4i32_offset_1:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; ZVZIP-NEXT: vmv.v.i v0, 8
+; ZVZIP-NEXT: vmv1r.v v12, v9
+; ZVZIP-NEXT: vslideup.vi v12, v9, 1, v0.t
+; ZVZIP-NEXT: vmv.v.i v0, 10
+; ZVZIP-NEXT: vzip.vv v10, v8, v9
+; ZVZIP-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVZIP-NEXT: ret
%a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 1, i32 6>
ret <4 x i32> %a
}
@@ -300,6 +375,14 @@ define <16 x i8> @interleave_v8i8(<8 x i8> %x, <8 x i8> %y) {
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_v8i8:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; ZVZIP-NEXT: vmv1r.v v10, v9
+; ZVZIP-NEXT: vmv1r.v v11, v8
+; ZVZIP-NEXT: vzip.vv v8, v11, v10
+; ZVZIP-NEXT: ret
%a = shufflevector <8 x i8> %x, <8 x i8> %y, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
ret <16 x i8> %a
}
@@ -332,6 +415,14 @@ define <16 x i16> @interleave_v8i16(<8 x i16> %x, <8 x i16> %y) {
; ZIP-NEXT: ri.vzip2a.vv v10, v12, v8
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_v8i16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVZIP-NEXT: vmv1r.v v14, v9
+; ZVZIP-NEXT: vmv1r.v v12, v8
+; ZVZIP-NEXT: vzip.vv v8, v14, v12
+; ZVZIP-NEXT: ret
%a = shufflevector <8 x i16> %x, <8 x i16> %y, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
ret <16 x i16> %a
}
@@ -363,6 +454,14 @@ define <16 x i32> @interleave_v8i32(<8 x i32> %x, <8 x i32> %y) {
; ZIP-NEXT: ri.vzip2a.vv v12, v8, v16
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_v8i32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; ZVZIP-NEXT: vmv2r.v v20, v10
+; ZVZIP-NEXT: vmv2r.v v16, v8
+; ZVZIP-NEXT: vzip.vv v8, v16, v20
+; ZVZIP-NEXT: ret
%a = shufflevector <8 x i32> %x, <8 x i32> %y, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
ret <16 x i32> %a
}
@@ -396,6 +495,16 @@ define <32 x i8> @interleave_v16i8(<16 x i8> %x, <16 x i8> %y) {
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_v16i8:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; ZVZIP-NEXT: vmv1r.v v14, v9
+; ZVZIP-NEXT: vmv1r.v v12, v8
+; ZVZIP-NEXT: li a0, 32
+; ZVZIP-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; ZVZIP-NEXT: vzip.vv v8, v12, v14
+; ZVZIP-NEXT: ret
%a = shufflevector <16 x i8> %x, <16 x i8> %y, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
ret <32 x i8> %a
}
@@ -429,6 +538,16 @@ define <32 x i16> @interleave_v16i16(<16 x i16> %x, <16 x i16> %y) {
; ZIP-NEXT: ri.vzip2a.vv v12, v8, v16
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_v16i16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v20, v10
+; ZVZIP-NEXT: vmv2r.v v16, v8
+; ZVZIP-NEXT: li a0, 32
+; ZVZIP-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVZIP-NEXT: vzip.vv v8, v16, v20
+; ZVZIP-NEXT: ret
%a = shufflevector <16 x i16> %x, <16 x i16> %y, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
ret <32 x i16> %a
}
@@ -463,6 +582,16 @@ define <32 x i32> @interleave_v16i32(<16 x i32> %x, <16 x i32> %y) {
; ZIP-NEXT: ri.vzip2a.vv v16, v8, v24
; ZIP-NEXT: vmv.v.v v8, v16
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_v16i32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; ZVZIP-NEXT: vmv4r.v v16, v12
+; ZVZIP-NEXT: vmv4r.v v20, v8
+; ZVZIP-NEXT: vwaddu.vv v8, v20, v16
+; ZVZIP-NEXT: li a0, -1
+; ZVZIP-NEXT: vwmaccu.vx v8, a0, v16
+; ZVZIP-NEXT: ret
%a = shufflevector <16 x i32> %x, <16 x i32> %y, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
ret <32 x i32> %a
}
@@ -499,6 +628,16 @@ define <64 x i8> @interleave_v32i8(<32 x i8> %x, <32 x i8> %y) {
; ZIP-NEXT: ri.vzip2a.vv v12, v8, v16
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_v32i8:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v20, v10
+; ZVZIP-NEXT: vmv2r.v v16, v8
+; ZVZIP-NEXT: li a0, 64
+; ZVZIP-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; ZVZIP-NEXT: vzip.vv v8, v16, v20
+; ZVZIP-NEXT: ret
%a = shufflevector <32 x i8> %x, <32 x i8> %y, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
ret <64 x i8> %a
}
@@ -537,6 +676,18 @@ define <64 x i16> @interleave_v32i16(<32 x i16> %x, <32 x i16> %y) {
; ZIP-NEXT: ri.vzip2a.vv v16, v8, v24
; ZIP-NEXT: vmv.v.v v8, v16
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_v32i16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; ZVZIP-NEXT: vmv4r.v v16, v12
+; ZVZIP-NEXT: vmv4r.v v20, v8
+; ZVZIP-NEXT: li a0, 32
+; ZVZIP-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVZIP-NEXT: vwaddu.vv v8, v20, v16
+; ZVZIP-NEXT: li a0, -1
+; ZVZIP-NEXT: vwmaccu.vx v8, a0, v16
+; ZVZIP-NEXT: ret
%a = shufflevector <32 x i16> %x, <32 x i16> %y, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
ret <64 x i16> %a
}
@@ -677,6 +828,45 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
; ZIP-NEXT: addi sp, sp, 16
; ZIP-NEXT: .cfi_def_cfa_offset 0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_v32i32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -16
+; ZVZIP-NEXT: .cfi_def_cfa_offset 16
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 3
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVZIP-NEXT: addi a0, sp, 16
+; ZVZIP-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVZIP-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVZIP-NEXT: vslidedown.vi v24, v16, 16
+; ZVZIP-NEXT: li a0, 32
+; ZVZIP-NEXT: lui a1, 699051
+; ZVZIP-NEXT: vslidedown.vi v0, v8, 16
+; ZVZIP-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; ZVZIP-NEXT: vzext.vf2 v8, v24
+; ZVZIP-NEXT: addi a1, a1, -1366
+; ZVZIP-NEXT: vzext.vf2 v24, v0
+; ZVZIP-NEXT: vmv.s.x v0, a1
+; ZVZIP-NEXT: vsll.vx v8, v8, a0
+; ZVZIP-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; ZVZIP-NEXT: vmerge.vvm v24, v24, v8, v0
+; ZVZIP-NEXT: addi a0, sp, 16
+; ZVZIP-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVZIP-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; ZVZIP-NEXT: vwaddu.vv v0, v8, v16
+; ZVZIP-NEXT: li a0, -1
+; ZVZIP-NEXT: vwmaccu.vx v0, a0, v16
+; ZVZIP-NEXT: vmv8r.v v8, v0
+; ZVZIP-NEXT: vmv8r.v v16, v24
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 3
+; ZVZIP-NEXT: add sp, sp, a0
+; ZVZIP-NEXT: .cfi_def_cfa sp, 16
+; ZVZIP-NEXT: addi sp, sp, 16
+; ZVZIP-NEXT: .cfi_def_cfa_offset 0
+; ZVZIP-NEXT: ret
%a = shufflevector <32 x i32> %x, <32 x i32> %y, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
ret <64 x i32> %a
}
@@ -711,6 +901,15 @@ define <4 x i8> @unary_interleave_v4i8(<4 x i8> %x) {
; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10
; ZIP-NEXT: vmv1r.v v8, v9
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: unary_interleave_v4i8:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; ZVZIP-NEXT: vslidedown.vi v10, v8, 2
+; ZVZIP-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVZIP-NEXT: vzip.vv v9, v8, v10
+; ZVZIP-NEXT: vmv1r.v v8, v9
+; ZVZIP-NEXT: ret
%a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
ret <4 x i8> %a
}
@@ -749,6 +948,17 @@ define <4 x i8> @unary_interleave_v4i8_invalid(<4 x i8> %x) {
; ZIP-NEXT: vrgather.vv v9, v8, v10
; ZIP-NEXT: vmv1r.v v8, v9
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: unary_interleave_v4i8_invalid:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: lui a0, 16
+; ZVZIP-NEXT: addi a0, a0, 768
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVZIP-NEXT: vmv.s.x v10, a0
+; ZVZIP-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; ZVZIP-NEXT: vrgather.vv v9, v8, v10
+; ZVZIP-NEXT: vmv1r.v v8, v9
+; ZVZIP-NEXT: ret
%a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 4>
ret <4 x i8> %a
}
@@ -783,6 +993,15 @@ define <4 x i16> @unary_interleave_v4i16(<4 x i16> %x) {
; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10
; ZIP-NEXT: vmv1r.v v8, v9
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: unary_interleave_v4i16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; ZVZIP-NEXT: vslidedown.vi v10, v8, 2
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVZIP-NEXT: vzip.vv v9, v8, v10
+; ZVZIP-NEXT: vmv1r.v v8, v9
+; ZVZIP-NEXT: ret
%a = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
ret <4 x i16> %a
}
@@ -817,6 +1036,15 @@ define <4 x i32> @unary_interleave_v4i32(<4 x i32> %x) {
; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10
; ZIP-NEXT: vmv.v.v v8, v9
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: unary_interleave_v4i32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; ZVZIP-NEXT: vmv1r.v v10, v8
+; ZVZIP-NEXT: vslidedown.vi v11, v8, 2
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVZIP-NEXT: vzip.vv v8, v10, v11
+; ZVZIP-NEXT: ret
%a = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
ret <4 x i32> %a
}
@@ -868,6 +1096,15 @@ define <4 x i64> @unary_interleave_v4i64(<4 x i64> %x) {
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: unary_interleave_v4i64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
+; ZVZIP-NEXT: vmv2r.v v12, v8
+; ZVZIP-NEXT: vslidedown.vi v14, v8, 2
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVZIP-NEXT: vzip.vv v8, v12, v14
+; ZVZIP-NEXT: ret
%a = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
ret <4 x i64> %a
}
@@ -902,6 +1139,15 @@ define <8 x i8> @unary_interleave_v8i8(<8 x i8> %x) {
; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10
; ZIP-NEXT: vmv1r.v v8, v9
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: unary_interleave_v8i8:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; ZVZIP-NEXT: vslidedown.vi v10, v8, 4
+; ZVZIP-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; ZVZIP-NEXT: vzip.vv v9, v8, v10
+; ZVZIP-NEXT: vmv1r.v v8, v9
+; ZVZIP-NEXT: ret
%a = shufflevector <8 x i8> %x, <8 x i8> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 poison, i32 6, i32 3, i32 7>
ret <8 x i8> %a
}
@@ -936,6 +1182,15 @@ define <8 x i16> @unary_interleave_v8i16(<8 x i16> %x) {
; ZIP-NEXT: ri.vzip2a.vv v9, v10, v8
; ZIP-NEXT: vmv.v.v v8, v9
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: unary_interleave_v8i16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; ZVZIP-NEXT: vmv1r.v v10, v8
+; ZVZIP-NEXT: vslidedown.vi v11, v8, 4
+; ZVZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVZIP-NEXT: vzip.vv v8, v11, v10
+; ZVZIP-NEXT: ret
%a = shufflevector <8 x i16> %x, <8 x i16> poison, <8 x i32> <i32 4, i32 poison, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3>
ret <8 x i16> %a
}
@@ -970,6 +1225,15 @@ define <8 x i32> @unary_interleave_v8i32(<8 x i32> %x) {
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: unary_interleave_v8i32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m2, ta, ma
+; ZVZIP-NEXT: vmv2r.v v12, v8
+; ZVZIP-NEXT: vslidedown.vi v14, v8, 4
+; ZVZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZVZIP-NEXT: vzip.vv v8, v12, v14
+; ZVZIP-NEXT: ret
%a = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
ret <8 x i32> %a
}
@@ -992,6 +1256,14 @@ define <4 x i8> @unary_interleave_10uu_v4i8(<4 x i8> %x) {
; ZIP-NEXT: vsll.vi v8, v8, 8
; ZIP-NEXT: vor.vv v8, v8, v9
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: unary_interleave_10uu_v4i8:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVZIP-NEXT: vsrl.vi v9, v8, 8
+; ZVZIP-NEXT: vsll.vi v8, v8, 8
+; ZVZIP-NEXT: vor.vv v8, v8, v9
+; ZVZIP-NEXT: ret
%a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>
ret <4 x i8> %a
}
@@ -1023,6 +1295,14 @@ define <16 x i16> @interleave_slp(<8 x i16> %v0, <8 x i16> %v1) {
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_slp:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVZIP-NEXT: vmv1r.v v14, v9
+; ZVZIP-NEXT: vmv1r.v v12, v8
+; ZVZIP-NEXT: vzip.vv v8, v12, v14
+; ZVZIP-NEXT: ret
entry:
%v2 = shufflevector <8 x i16> %v0, <8 x i16> poison, <16 x i32> <i32 0, i32 poison, i32 1, i32 poison, i32 2, i32 poison, i32 3, i32 poison, i32 4, i32 poison, i32 5, i32 poison, i32 6, i32 poison, i32 7, i32 poison>
%v3 = shufflevector <8 x i16> %v1, <8 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -1033,5 +1313,7 @@ entry:
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32-V128: {{.*}}
; RV32-ZIP: {{.*}}
+; RV32-ZVZIP: {{.*}}
; RV64-V128: {{.*}}
; RV64-ZIP: {{.*}}
+; RV64-ZVZIP: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll
index 14c17a65c6a0f..d284047bde483 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll
@@ -3,6 +3,8 @@
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+experimental-xrivosvizip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZIP,ZIP-RV32
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+experimental-xrivosvizip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZIP,ZIP-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+experimental-zvzip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVZIP,ZVZIP-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+experimental-zvzip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVZIP,ZVZIP-RV64
define <4 x i32> @zipeven_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: zipeven_v4i32:
@@ -18,6 +20,13 @@ define <4 x i32> @zipeven_v4i32(<4 x i32> %a, <4 x i32> %b) {
; ZIP-NEXT: ri.vzipeven.vv v10, v8, v9
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipeven_v4i32:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVZIP-NEXT: vpaire.vv v10, v8, v9
+; ZVZIP-NEXT: vmv.v.v v8, v10
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
ret <4 x i32> %c
@@ -38,6 +47,13 @@ define <4 x i32> @zipeven_v4i32_swapped(<4 x i32> %a, <4 x i32> %b) {
; ZIP-NEXT: ri.vzipeven.vv v10, v9, v8
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipeven_v4i32_swapped:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVZIP-NEXT: vpaire.vv v10, v9, v8
+; ZVZIP-NEXT: vmv.v.v v8, v10
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
ret <4 x i32> %c
@@ -58,6 +74,13 @@ define <4 x i64> @zipeven_v4i64(<4 x i64> %a, <4 x i64> %b) {
; ZIP-NEXT: ri.vzipeven.vv v12, v8, v10
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipeven_v4i64:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVZIP-NEXT: vpaire.vv v12, v8, v10
+; ZVZIP-NEXT: vmv.v.v v8, v12
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
ret <4 x i64> %c
@@ -77,6 +100,13 @@ define <4 x half> @zipeven_v4f16(<4 x half> %a, <4 x half> %b) {
; ZIP-NEXT: ri.vzipeven.vv v10, v8, v9
; ZIP-NEXT: vmv1r.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipeven_v4f16:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVZIP-NEXT: vpaire.vv v10, v8, v9
+; ZVZIP-NEXT: vmv1r.v v8, v10
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
ret <4 x half> %c
@@ -96,6 +126,13 @@ define <4 x float> @zipeven_v4f32(<4 x float> %a, <4 x float> %b) {
; ZIP-NEXT: ri.vzipeven.vv v10, v8, v9
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipeven_v4f32:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVZIP-NEXT: vpaire.vv v10, v8, v9
+; ZVZIP-NEXT: vmv.v.v v8, v10
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
ret <4 x float> %c
@@ -116,6 +153,13 @@ define <4 x double> @zipeven_v4f64(<4 x double> %a, <4 x double> %b) {
; ZIP-NEXT: ri.vzipeven.vv v12, v8, v10
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipeven_v4f64:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVZIP-NEXT: vpaire.vv v12, v8, v10
+; ZVZIP-NEXT: vmv.v.v v8, v12
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
ret <4 x double> %c
@@ -137,6 +181,13 @@ define <4 x i32> @zipodd_v4i32(<4 x i32> %a, <4 x i32> %b) {
; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipodd_v4i32:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVZIP-NEXT: vpairo.vv v10, v8, v9
+; ZVZIP-NEXT: vmv.v.v v8, v10
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
ret <4 x i32> %c
@@ -156,6 +207,13 @@ define <4 x i32> @zipodd_v4i32_swapped(<4 x i32> %a, <4 x i32> %b) {
; ZIP-NEXT: ri.vzipodd.vv v10, v9, v8
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipodd_v4i32_swapped:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVZIP-NEXT: vpairo.vv v10, v9, v8
+; ZVZIP-NEXT: vmv.v.v v8, v10
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3>
ret <4 x i32> %c
@@ -172,6 +230,10 @@ define <4 x i32> @zipeven_v4i32_single(<4 x i32> %a) {
; ZIP-LABEL: zipeven_v4i32_single:
; ZIP: # %bb.0: # %entry
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipeven_v4i32_single:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 2, i32 poison>
ret <4 x i32> %c
@@ -192,6 +254,12 @@ define <4 x i32> @zipodd_v4i32_single(<4 x i32> %a) {
; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; ZIP-NEXT: vslidedown.vi v8, v8, 1
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipodd_v4i32_single:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVZIP-NEXT: vslidedown.vi v8, v8, 1
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 3, i32 poison>
ret <4 x i32> %c
@@ -211,6 +279,13 @@ define <4 x i32> @zipodd_v4i32_both(<4 x i32> %a) {
; ZIP-NEXT: ri.vzipodd.vv v9, v8, v8
; ZIP-NEXT: vmv.v.v v8, v9
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipodd_v4i32_both:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVZIP-NEXT: vpairo.vv v9, v8, v8
+; ZVZIP-NEXT: vmv.v.v v8, v9
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
ret <4 x i32> %c
@@ -232,6 +307,13 @@ define <4 x i32> @zipeven_v4i32_both(<4 x i32> %a) {
; ZIP-NEXT: ri.vzipeven.vv v9, v8, v8
; ZIP-NEXT: vmv.v.v v8, v9
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipeven_v4i32_both:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVZIP-NEXT: vpaire.vv v9, v8, v8
+; ZVZIP-NEXT: vmv.v.v v8, v9
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
ret <4 x i32> %c
@@ -249,6 +331,12 @@ define <4 x i32> @zipeven_v4i32_partial(<4 x i32> %a, <4 x i32> %b) {
; ZIP-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; ZIP-NEXT: vslideup.vi v8, v9, 1
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipeven_v4i32_partial:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; ZVZIP-NEXT: vslideup.vi v8, v9, 1
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 poison>
ret <4 x i32> %c
@@ -269,6 +357,13 @@ define <4 x i32> @zipodd_v4i32_partial(<4 x i32> %a, <4 x i32> %b) {
; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipodd_v4i32_partial:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVZIP-NEXT: vpairo.vv v10, v8, v9
+; ZVZIP-NEXT: vmv.v.v v8, v10
+; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 poison>
ret <4 x i32> %c
@@ -289,6 +384,13 @@ define <8 x i32> @zipeven_v8i32(<8 x i32> %v1, <8 x i32> %v2) {
; ZIP-NEXT: ri.vzipeven.vv v12, v8, v10
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipeven_v8i32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZVZIP-NEXT: vpaire.vv v12, v8, v10
+; ZVZIP-NEXT: vmv.v.v v8, v12
+; ZVZIP-NEXT: ret
%out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
ret <8 x i32> %out
}
@@ -309,6 +411,13 @@ define <8 x i32> @zipodd_v8i32(<8 x i32> %v1, <8 x i32> %v2) {
; ZIP-NEXT: ri.vzipodd.vv v12, v8, v10
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipodd_v8i32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZVZIP-NEXT: vpairo.vv v12, v8, v10
+; ZVZIP-NEXT: vmv.v.v v8, v12
+; ZVZIP-NEXT: ret
%out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
ret <8 x i32> %out
}
@@ -329,6 +438,13 @@ define <16 x i64> @zipeven_v16i64(<16 x i64> %v1, <16 x i64> %v2) {
; ZIP-NEXT: ri.vzipeven.vv v24, v8, v16
; ZIP-NEXT: vmv.v.v v8, v24
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipeven_v16i64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; ZVZIP-NEXT: vpaire.vv v24, v8, v16
+; ZVZIP-NEXT: vmv.v.v v8, v24
+; ZVZIP-NEXT: ret
%out = shufflevector <16 x i64> %v1, <16 x i64> %v2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
ret <16 x i64> %out
}
@@ -350,6 +466,13 @@ define <16 x i64> @zipodd_v16i64(<16 x i64> %v1, <16 x i64> %v2) {
; ZIP-NEXT: ri.vzipodd.vv v24, v8, v16
; ZIP-NEXT: vmv.v.v v8, v24
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipodd_v16i64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; ZVZIP-NEXT: vpairo.vv v24, v8, v16
+; ZVZIP-NEXT: vmv.v.v v8, v24
+; ZVZIP-NEXT: ret
%out = shufflevector <16 x i64> %v1, <16 x i64> %v2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
ret <16 x i64> %out
}
@@ -369,6 +492,13 @@ define <8 x i32> @zipeven_v8i32_as_v4i64(<8 x i32> %v1, <8 x i32> %v2) {
; ZIP-NEXT: ri.vzipeven.vv v12, v8, v10
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipeven_v8i32_as_v4i64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVZIP-NEXT: vpaire.vv v12, v8, v10
+; ZVZIP-NEXT: vmv.v.v v8, v12
+; ZVZIP-NEXT: ret
%out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 12, i32 13>
ret <8 x i32> %out
}
@@ -389,6 +519,13 @@ define <8 x i32> @zipodd_v8i32_as_v4i64(<8 x i32> %v1, <8 x i32> %v2) {
; ZIP-NEXT: ri.vzipodd.vv v12, v8, v10
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: zipodd_v8i32_as_v4i64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVZIP-NEXT: vpairo.vv v12, v8, v10
+; ZVZIP-NEXT: vmv.v.v v8, v12
+; ZVZIP-NEXT: ret
%out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> <i32 2, i32 3, i32 10, i32 11, i32 6, i32 7, i32 14, i32 15>
ret <8 x i32> %out
}
@@ -398,3 +535,5 @@ define <8 x i32> @zipodd_v8i32_as_v4i64(<8 x i32> %v1, <8 x i32> %v2) {
; RV64: {{.*}}
; ZIP-RV32: {{.*}}
; ZIP-RV64: {{.*}}
+; ZVZIP-RV32: {{.*}}
+; ZVZIP-RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
index 5c04a09c9953b..a16749e7673bb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
@@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+m,+zvfh | FileCheck %s --check-prefixes=CHECK,V,RV32
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+m,+zvfh | FileCheck %s --check-prefixes=CHECK,V,RV64
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+m,+zvfh,+experimental-xrivosvizip | FileCheck %s --check-prefixes=CHECK,ZIP
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+m,+zvfh,+experimental-zvzip | FileCheck %s --check-prefixes=CHECK,ZVZIP
; Integers
@@ -91,6 +92,15 @@ define {<2 x i64>, <2 x i64>} @vector_deinterleave_v2i64_v4i64(<4 x i64> %vec) {
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: vmv.v.v v9, v11
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_v2i64_v4i64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVZIP-NEXT: vunzipe.v v10, v8
+; ZVZIP-NEXT: vunzipo.v v11, v8
+; ZVZIP-NEXT: vmv.v.v v8, v10
+; ZVZIP-NEXT: vmv.v.v v9, v11
+; ZVZIP-NEXT: ret
%retval = call {<2 x i64>, <2 x i64>} @llvm.vector.deinterleave2.v4i64(<4 x i64> %vec)
ret {<2 x i64>, <2 x i64>} %retval
}
@@ -134,6 +144,15 @@ define {<4 x i64>, <4 x i64>} @vector_deinterleave_v4i64_v8i64(<8 x i64> %vec) {
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: vmv.v.v v10, v14
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_v4i64_v8i64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVZIP-NEXT: vunzipe.v v12, v8
+; ZVZIP-NEXT: vunzipo.v v14, v8
+; ZVZIP-NEXT: vmv.v.v v8, v12
+; ZVZIP-NEXT: vmv.v.v v10, v14
+; ZVZIP-NEXT: ret
%retval = call {<4 x i64>, <4 x i64>} @llvm.vector.deinterleave2.v8i64(<8 x i64> %vec)
ret {<4 x i64>, <4 x i64>} %retval
}
@@ -172,6 +191,15 @@ define {<8 x i64>, <8 x i64>} @vector_deinterleave_v8i64_v16i64(<16 x i64> %vec)
; ZIP-NEXT: vmv.v.v v8, v16
; ZIP-NEXT: vmv.v.v v12, v20
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_v8i64_v16i64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; ZVZIP-NEXT: vunzipe.v v16, v8
+; ZVZIP-NEXT: vunzipo.v v20, v8
+; ZVZIP-NEXT: vmv.v.v v8, v16
+; ZVZIP-NEXT: vmv.v.v v12, v20
+; ZVZIP-NEXT: ret
%retval = call {<8 x i64>, <8 x i64>} @llvm.vector.deinterleave2.v16i64(<16 x i64> %vec)
ret {<8 x i64>, <8 x i64>} %retval
}
@@ -475,6 +503,15 @@ define {<2 x double>, <2 x double>} @vector_deinterleave_v2f64_v4f64(<4 x double
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: vmv.v.v v9, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_v2f64_v4f64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVZIP-NEXT: vunzipe.v v10, v8
+; ZVZIP-NEXT: vunzipo.v v11, v8
+; ZVZIP-NEXT: vmv.v.v v8, v10
+; ZVZIP-NEXT: vmv.v.v v9, v11
+; ZVZIP-NEXT: ret
%retval = call {<2 x double>, <2 x double>} @llvm.vector.deinterleave2.v4f64(<4 x double> %vec)
ret {<2 x double>, <2 x double>} %retval
}
@@ -518,6 +555,15 @@ define {<4 x double>, <4 x double>} @vector_deinterleave_v4f64_v8f64(<8 x double
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: vmv.v.v v10, v16
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_v4f64_v8f64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVZIP-NEXT: vunzipe.v v12, v8
+; ZVZIP-NEXT: vunzipo.v v14, v8
+; ZVZIP-NEXT: vmv.v.v v8, v12
+; ZVZIP-NEXT: vmv.v.v v10, v14
+; ZVZIP-NEXT: ret
%retval = call {<4 x double>, <4 x double>} @llvm.vector.deinterleave2.v8f64(<8 x double> %vec)
ret {<4 x double>, <4 x double>} %retval
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
index ac9f26314a9ab..8e928b66c51a1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
@@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+zvfhmin,+zvfbfmin | FileCheck %s --check-prefixes=CHECK,V
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zvfhmin,+zvfbfmin | FileCheck %s --check-prefixes=CHECK,V
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zvfhmin,+zvfbfmin,+experimental-xrivosvizip | FileCheck %s --check-prefixes=CHECK,ZIP
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zvfhmin,+zvfbfmin,+experimental-zvzip | FileCheck %s --check-prefixes=CHECK,ZVZIP
; Integers
@@ -39,6 +40,23 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_nxv16i1_nxv
; ZIP-NEXT: vmsne.vi v0, v12, 0
; ZIP-NEXT: vmsne.vi v8, v14, 0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv16i1_nxv32i1:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; ZVZIP-NEXT: vmv.v.i v10, 0
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: vmerge.vim v8, v10, 1, v0
+; ZVZIP-NEXT: srli a0, a0, 2
+; ZVZIP-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; ZVZIP-NEXT: vslidedown.vx v0, v0, a0
+; ZVZIP-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; ZVZIP-NEXT: vmerge.vim v10, v10, 1, v0
+; ZVZIP-NEXT: vunzipe.v v12, v8
+; ZVZIP-NEXT: vunzipo.v v14, v8
+; ZVZIP-NEXT: vmsne.vi v0, v12, 0
+; ZVZIP-NEXT: vmsne.vi v8, v14, 0
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 16 x i1>, <vscale x 16 x i1>} @llvm.vector.deinterleave2.nxv32i1(<vscale x 32 x i1> %vec)
ret {<vscale x 16 x i1>, <vscale x 16 x i1>} %retval
}
@@ -61,6 +79,15 @@ define {<vscale x 16 x i8>, <vscale x 16 x i8>} @vector_deinterleave_nxv16i8_nxv
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: vmv.v.v v10, v14
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv16i8_nxv32i8:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; ZVZIP-NEXT: vunzipe.v v12, v8
+; ZVZIP-NEXT: vunzipo.v v14, v8
+; ZVZIP-NEXT: vmv.v.v v8, v12
+; ZVZIP-NEXT: vmv.v.v v10, v14
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %vec)
ret {<vscale x 16 x i8>, <vscale x 16 x i8>} %retval
}
@@ -83,6 +110,15 @@ define {<vscale x 8 x i16>, <vscale x 8 x i16>} @vector_deinterleave_nxv8i16_nxv
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: vmv.v.v v10, v14
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv8i16_nxv16i16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVZIP-NEXT: vunzipe.v v12, v8
+; ZVZIP-NEXT: vunzipo.v v14, v8
+; ZVZIP-NEXT: vmv.v.v v8, v12
+; ZVZIP-NEXT: vmv.v.v v10, v14
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.vector.deinterleave2.nxv16i16(<vscale x 16 x i16> %vec)
ret {<vscale x 8 x i16>, <vscale x 8 x i16>} %retval
}
@@ -106,6 +142,15 @@ define {<vscale x 4 x i32>, <vscale x 4 x i32>} @vector_deinterleave_nxv4i32_nxv
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: vmv.v.v v10, v14
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv4i32_nxvv8i32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; ZVZIP-NEXT: vunzipe.v v12, v8
+; ZVZIP-NEXT: vunzipo.v v14, v8
+; ZVZIP-NEXT: vmv.v.v v8, v12
+; ZVZIP-NEXT: vmv.v.v v10, v14
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %vec)
ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %retval
}
@@ -133,6 +178,15 @@ define {<vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: vmv.v.v v10, v14
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv2i64_nxv4i64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; ZVZIP-NEXT: vunzipe.v v12, v8
+; ZVZIP-NEXT: vunzipo.v v14, v8
+; ZVZIP-NEXT: vmv.v.v v8, v12
+; ZVZIP-NEXT: vmv.v.v v10, v14
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %vec)
ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %retval
}
@@ -160,6 +214,15 @@ define {<vscale x 4 x i64>, <vscale x 4 x i64>} @vector_deinterleave_nxv4i64_nxv
; ZIP-NEXT: vmv.v.v v8, v16
; ZIP-NEXT: vmv.v.v v12, v20
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv4i64_nxv8i64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; ZVZIP-NEXT: vunzipe.v v16, v8
+; ZVZIP-NEXT: vunzipo.v v20, v8
+; ZVZIP-NEXT: vmv.v.v v8, v16
+; ZVZIP-NEXT: vmv.v.v v12, v20
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 4 x i64>, <vscale x 4 x i64>} @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %vec)
ret {<vscale x 4 x i64>, <vscale x 4 x i64>} %retval
}
@@ -203,6 +266,24 @@ define {<vscale x 64 x i1>, <vscale x 64 x i1>} @vector_deinterleave_nxv64i1_nxv
; ZIP-NEXT: vmsne.vi v8, v0, 0
; ZIP-NEXT: vmv1r.v v0, v16
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv64i1_nxv128i1:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; ZVZIP-NEXT: vmv.v.i v24, 0
+; ZVZIP-NEXT: vmerge.vim v16, v24, 1, v0
+; ZVZIP-NEXT: vmv1r.v v0, v8
+; ZVZIP-NEXT: vmerge.vim v24, v24, 1, v0
+; ZVZIP-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; ZVZIP-NEXT: vunzipe.v v8, v16
+; ZVZIP-NEXT: vunzipo.v v0, v16
+; ZVZIP-NEXT: vunzipe.v v12, v24
+; ZVZIP-NEXT: vunzipo.v v4, v24
+; ZVZIP-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; ZVZIP-NEXT: vmsne.vi v16, v8, 0
+; ZVZIP-NEXT: vmsne.vi v8, v0, 0
+; ZVZIP-NEXT: vmv1r.v v0, v16
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 64 x i1>, <vscale x 64 x i1>} @llvm.vector.deinterleave2.nxv128i1(<vscale x 128 x i1> %vec)
ret {<vscale x 64 x i1>, <vscale x 64 x i1>} %retval
}
@@ -229,6 +310,17 @@ define {<vscale x 64 x i8>, <vscale x 64 x i8>} @vector_deinterleave_nxv64i8_nxv
; ZIP-NEXT: vmv8r.v v8, v24
; ZIP-NEXT: vmv8r.v v16, v0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv64i8_nxv128i8:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; ZVZIP-NEXT: vmv8r.v v24, v8
+; ZVZIP-NEXT: vunzipe.v v8, v24
+; ZVZIP-NEXT: vunzipo.v v0, v24
+; ZVZIP-NEXT: vunzipe.v v12, v16
+; ZVZIP-NEXT: vunzipo.v v4, v16
+; ZVZIP-NEXT: vmv8r.v v16, v0
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 64 x i8>, <vscale x 64 x i8>} @llvm.vector.deinterleave2.nxv128i8(<vscale x 128 x i8> %vec)
ret {<vscale x 64 x i8>, <vscale x 64 x i8>} %retval
}
@@ -255,6 +347,17 @@ define {<vscale x 32 x i16>, <vscale x 32 x i16>} @vector_deinterleave_nxv32i16_
; ZIP-NEXT: vmv8r.v v8, v24
; ZIP-NEXT: vmv8r.v v16, v0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv32i16_nxv64i16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVZIP-NEXT: vmv8r.v v24, v8
+; ZVZIP-NEXT: vunzipe.v v8, v24
+; ZVZIP-NEXT: vunzipo.v v0, v24
+; ZVZIP-NEXT: vunzipe.v v12, v16
+; ZVZIP-NEXT: vunzipo.v v4, v16
+; ZVZIP-NEXT: vmv8r.v v16, v0
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 32 x i16>, <vscale x 32 x i16>} @llvm.vector.deinterleave2.nxv64i16(<vscale x 64 x i16> %vec)
ret {<vscale x 32 x i16>, <vscale x 32 x i16>} %retval
}
@@ -282,6 +385,17 @@ define {<vscale x 16 x i32>, <vscale x 16 x i32>} @vector_deinterleave_nxv16i32_
; ZIP-NEXT: vmv8r.v v8, v24
; ZIP-NEXT: vmv8r.v v16, v0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv16i32_nxvv32i32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; ZVZIP-NEXT: vmv8r.v v24, v8
+; ZVZIP-NEXT: vunzipe.v v8, v24
+; ZVZIP-NEXT: vunzipo.v v0, v24
+; ZVZIP-NEXT: vunzipe.v v12, v16
+; ZVZIP-NEXT: vunzipo.v v4, v16
+; ZVZIP-NEXT: vmv8r.v v16, v0
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 16 x i32>, <vscale x 16 x i32>} @llvm.vector.deinterleave2.nxv32i32(<vscale x 32 x i32> %vec)
ret {<vscale x 16 x i32>, <vscale x 16 x i32>} %retval
}
@@ -343,6 +457,17 @@ define {<vscale x 8 x i64>, <vscale x 8 x i64>} @vector_deinterleave_nxv8i64_nxv
; ZIP-NEXT: vmv8r.v v8, v24
; ZIP-NEXT: vmv8r.v v16, v0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv8i64_nxv16i64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; ZVZIP-NEXT: vmv8r.v v24, v8
+; ZVZIP-NEXT: vunzipe.v v8, v24
+; ZVZIP-NEXT: vunzipo.v v0, v24
+; ZVZIP-NEXT: vunzipe.v v12, v16
+; ZVZIP-NEXT: vunzipo.v v4, v16
+; ZVZIP-NEXT: vmv8r.v v16, v0
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 8 x i64>, <vscale x 8 x i64>} @llvm.vector.deinterleave2.nxv16i64(<vscale x 16 x i64> %vec)
ret {<vscale x 8 x i64>, <vscale x 8 x i64>} %retval
}
@@ -1592,6 +1717,14 @@ define {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @vector_deinterleave_nxv2b
; ZIP-NEXT: ri.vunzip2b.vv v9, v8, v11
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv2bf16_nxv4bf16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVZIP-NEXT: vunzipe.v v10, v8
+; ZVZIP-NEXT: vunzipo.v v9, v8
+; ZVZIP-NEXT: vmv1r.v v8, v10
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @llvm.vector.deinterleave2.nxv4bf16(<vscale x 4 x bfloat> %vec)
ret {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} %retval
}
@@ -1612,6 +1745,14 @@ define {<vscale x 2 x half>, <vscale x 2 x half>} @vector_deinterleave_nxv2f16_n
; ZIP-NEXT: ri.vunzip2b.vv v9, v8, v11
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv2f16_nxv4f16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVZIP-NEXT: vunzipe.v v10, v8
+; ZVZIP-NEXT: vunzipo.v v9, v8
+; ZVZIP-NEXT: vmv1r.v v8, v10
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 2 x half>, <vscale x 2 x half>} @llvm.vector.deinterleave2.nxv4f16(<vscale x 4 x half> %vec)
ret {<vscale x 2 x half>, <vscale x 2 x half>} %retval
}
@@ -1634,6 +1775,15 @@ define {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @vector_deinterleave_nxv4b
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: vmv.v.v v9, v11
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv4bf16_nxv8bf16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVZIP-NEXT: vunzipe.v v10, v8
+; ZVZIP-NEXT: vunzipo.v v11, v8
+; ZVZIP-NEXT: vmv.v.v v8, v10
+; ZVZIP-NEXT: vmv.v.v v9, v11
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @llvm.vector.deinterleave2.nxv8bf16(<vscale x 8 x bfloat> %vec)
ret {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} %retval
}
@@ -1656,6 +1806,15 @@ define {<vscale x 4 x half>, <vscale x 4 x half>} @vector_deinterleave_nxv4f16_n
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: vmv.v.v v9, v11
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv4f16_nxv8f16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVZIP-NEXT: vunzipe.v v10, v8
+; ZVZIP-NEXT: vunzipo.v v11, v8
+; ZVZIP-NEXT: vmv.v.v v8, v10
+; ZVZIP-NEXT: vmv.v.v v9, v11
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 4 x half>, <vscale x 4 x half>} @llvm.vector.deinterleave2.nxv8f16(<vscale x 8 x half> %vec)
ret {<vscale x 4 x half>, <vscale x 4 x half>} %retval
}
@@ -1679,6 +1838,15 @@ define {<vscale x 2 x float>, <vscale x 2 x float>} @vector_deinterleave_nxv2f32
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: vmv.v.v v9, v11
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv2f32_nxv4f32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; ZVZIP-NEXT: vunzipe.v v10, v8
+; ZVZIP-NEXT: vunzipo.v v11, v8
+; ZVZIP-NEXT: vmv.v.v v8, v10
+; ZVZIP-NEXT: vmv.v.v v9, v11
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 2 x float>, <vscale x 2 x float>} @llvm.vector.deinterleave2.nxv4f32(<vscale x 4 x float> %vec)
ret {<vscale x 2 x float>, <vscale x 2 x float>} %retval
}
@@ -1701,6 +1869,15 @@ define {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @vector_deinterleave_nxv8b
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: vmv.v.v v10, v14
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv8bf16_nxv16bf16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVZIP-NEXT: vunzipe.v v12, v8
+; ZVZIP-NEXT: vunzipo.v v14, v8
+; ZVZIP-NEXT: vmv.v.v v8, v12
+; ZVZIP-NEXT: vmv.v.v v10, v14
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.vector.deinterleave2.nxv16bf16(<vscale x 16 x bfloat> %vec)
ret {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} %retval
}
@@ -1723,6 +1900,15 @@ define {<vscale x 8 x half>, <vscale x 8 x half>} @vector_deinterleave_nxv8f16_n
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: vmv.v.v v10, v14
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv8f16_nxv16f16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVZIP-NEXT: vunzipe.v v12, v8
+; ZVZIP-NEXT: vunzipo.v v14, v8
+; ZVZIP-NEXT: vmv.v.v v8, v12
+; ZVZIP-NEXT: vmv.v.v v10, v14
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.vector.deinterleave2.nxv16f16(<vscale x 16 x half> %vec)
ret {<vscale x 8 x half>, <vscale x 8 x half>} %retval
}
@@ -1746,6 +1932,15 @@ define {<vscale x 4 x float>, <vscale x 4 x float>} @vector_deinterleave_nxv4f32
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: vmv.v.v v10, v14
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv4f32_nxv8f32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; ZVZIP-NEXT: vunzipe.v v12, v8
+; ZVZIP-NEXT: vunzipo.v v14, v8
+; ZVZIP-NEXT: vmv.v.v v8, v12
+; ZVZIP-NEXT: vmv.v.v v10, v14
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> %vec)
ret {<vscale x 4 x float>, <vscale x 4 x float>} %retval
}
@@ -1773,6 +1968,15 @@ define {<vscale x 2 x double>, <vscale x 2 x double>} @vector_deinterleave_nxv2f
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: vmv.v.v v10, v14
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv2f64_nxv4f64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; ZVZIP-NEXT: vunzipe.v v12, v8
+; ZVZIP-NEXT: vunzipo.v v14, v8
+; ZVZIP-NEXT: vmv.v.v v8, v12
+; ZVZIP-NEXT: vmv.v.v v10, v14
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %vec)
ret {<vscale x 2 x double>, <vscale x 2 x double>} %retval
}
@@ -1800,6 +2004,17 @@ define {<vscale x 32 x bfloat>, <vscale x 32 x bfloat>} @vector_deinterleave_nxv
; ZIP-NEXT: vmv8r.v v8, v24
; ZIP-NEXT: vmv8r.v v16, v0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv32bf16_nxv64bf16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVZIP-NEXT: vmv8r.v v24, v8
+; ZVZIP-NEXT: vunzipe.v v8, v24
+; ZVZIP-NEXT: vunzipo.v v0, v24
+; ZVZIP-NEXT: vunzipe.v v12, v16
+; ZVZIP-NEXT: vunzipo.v v4, v16
+; ZVZIP-NEXT: vmv8r.v v16, v0
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 32 x bfloat>, <vscale x 32 x bfloat>} @llvm.vector.deinterleave2.nxv64bf16(<vscale x 64 x bfloat> %vec)
ret {<vscale x 32 x bfloat>, <vscale x 32 x bfloat>} %retval
}
@@ -1826,6 +2041,17 @@ define {<vscale x 32 x half>, <vscale x 32 x half>} @vector_deinterleave_nxv32f1
; ZIP-NEXT: vmv8r.v v8, v24
; ZIP-NEXT: vmv8r.v v16, v0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv32f16_nxv64f16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVZIP-NEXT: vmv8r.v v24, v8
+; ZVZIP-NEXT: vunzipe.v v8, v24
+; ZVZIP-NEXT: vunzipo.v v0, v24
+; ZVZIP-NEXT: vunzipe.v v12, v16
+; ZVZIP-NEXT: vunzipo.v v4, v16
+; ZVZIP-NEXT: vmv8r.v v16, v0
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 32 x half>, <vscale x 32 x half>} @llvm.vector.deinterleave2.nxv64f16(<vscale x 64 x half> %vec)
ret {<vscale x 32 x half>, <vscale x 32 x half>} %retval
}
@@ -1853,6 +2079,17 @@ define {<vscale x 16 x float>, <vscale x 16 x float>} @vector_deinterleave_nxv16
; ZIP-NEXT: vmv8r.v v8, v24
; ZIP-NEXT: vmv8r.v v16, v0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv16f32_nxv32f32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; ZVZIP-NEXT: vmv8r.v v24, v8
+; ZVZIP-NEXT: vunzipe.v v8, v24
+; ZVZIP-NEXT: vunzipo.v v0, v24
+; ZVZIP-NEXT: vunzipe.v v12, v16
+; ZVZIP-NEXT: vunzipo.v v4, v16
+; ZVZIP-NEXT: vmv8r.v v16, v0
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 16 x float>, <vscale x 16 x float>} @llvm.vector.deinterleave2.nxv32f32(<vscale x 32 x float> %vec)
ret {<vscale x 16 x float>, <vscale x 16 x float>} %retval
}
@@ -1914,6 +2151,17 @@ define {<vscale x 8 x double>, <vscale x 8 x double>} @vector_deinterleave_nxv8f
; ZIP-NEXT: vmv8r.v v8, v24
; ZIP-NEXT: vmv8r.v v16, v0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv8f64_nxv16f64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; ZVZIP-NEXT: vmv8r.v v24, v8
+; ZVZIP-NEXT: vunzipe.v v8, v24
+; ZVZIP-NEXT: vunzipo.v v0, v24
+; ZVZIP-NEXT: vunzipe.v v12, v16
+; ZVZIP-NEXT: vunzipo.v v4, v16
+; ZVZIP-NEXT: vmv8r.v v16, v0
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 8 x double>, <vscale x 8 x double>} @llvm.vector.deinterleave2.nxv16f64(<vscale x 16 x double> %vec)
ret {<vscale x 8 x double>, <vscale x 8 x double>} %retval
}
@@ -3698,6 +3946,13 @@ define <vscale x 16 x i8> @vector_deinterleave_nxv16i8_nxv32i8_oneactive(<vscale
; ZIP-NEXT: ri.vunzip2a.vv v12, v8, v10
; ZIP-NEXT: vmv.v.v v8, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_deinterleave_nxv16i8_nxv32i8_oneactive:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; ZVZIP-NEXT: vunzipe.v v12, v8
+; ZVZIP-NEXT: vmv.v.v v8, v12
+; ZVZIP-NEXT: ret
%retval = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %vec)
%ext = extractvalue {<vscale x 16 x i8>, <vscale x 16 x i8>} %retval, 0
ret <vscale x 16 x i8> %ext
@@ -3771,6 +4026,17 @@ define { <8 x float>, <8 x float> } @interleave_deinterleave2(<8 x float> %a, <8
; ZIP-NEXT: vnsrl.wx v10, v16, a0
; ZIP-NEXT: vnsrl.wi v8, v16, 0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave_deinterleave2:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; ZVZIP-NEXT: vmv2r.v v12, v10
+; ZVZIP-NEXT: li a0, 32
+; ZVZIP-NEXT: vzip.vv v16, v8, v12
+; ZVZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZVZIP-NEXT: vnsrl.wx v10, v16, a0
+; ZVZIP-NEXT: vnsrl.wi v8, v16, 0
+; ZVZIP-NEXT: ret
entry:
%0 = call <16 x float> @llvm.vector.interleave2.v16f32(<8 x float> %a, <8 x float> %b)
%1 = call { <8 x float>, <8 x float> } @llvm.vector.deinterleave2.v16f32(<16 x float> %0)
@@ -3798,6 +4064,16 @@ define <16 x float> @deinterleave_interleave2(<16 x float> %arg) {
; ZIP-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; ZIP-NEXT: ri.vzip2a.vv v8, v12, v16
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: deinterleave_interleave2:
+; ZVZIP: # %bb.0: # %entry
+; ZVZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZVZIP-NEXT: vnsrl.wi v16, v8, 0
+; ZVZIP-NEXT: li a0, 32
+; ZVZIP-NEXT: vnsrl.wx v20, v8, a0
+; ZVZIP-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; ZVZIP-NEXT: vzip.vv v8, v16, v20
+; ZVZIP-NEXT: ret
entry:
%0 = call { <8 x float>, <8 x float> } @llvm.vector.deinterleave2.v16f32(<16 x float> %arg)
%a = extractvalue { <8 x float>, <8 x float> } %0, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
index 38d38f78c6054..91819fb1b5efc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
@@ -5,6 +5,8 @@
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvbb,+zvfh | FileCheck %s --check-prefix=ZVBB
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvbb,+zvfh,+experimental-xrivosvizip | FileCheck %s --check-prefix=ZIP
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvbb,+zvfh,+experimental-xrivosvizip | FileCheck %s --check-prefix=ZIP
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvbb,+zvfh,+experimental-zvzip | FileCheck %s --check-prefix=ZVZIP
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvbb,+zvfh,+experimental-zvzip | FileCheck %s --check-prefix=ZVZIP
; Integers
@@ -58,6 +60,21 @@ define <32 x i1> @vector_interleave_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b) {
; ZIP-NEXT: ri.vzip2a.vv v12, v8, v10
; ZIP-NEXT: vmsne.vi v0, v12, 0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_v32i1_v16i1:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: li a0, 32
+; ZVZIP-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVZIP-NEXT: vslideup.vi v0, v8, 2
+; ZVZIP-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; ZVZIP-NEXT: vmv.v.i v8, 0
+; ZVZIP-NEXT: vmerge.vim v12, v8, 1, v0
+; ZVZIP-NEXT: vsetivli zero, 16, e8, m2, ta, ma
+; ZVZIP-NEXT: vslidedown.vi v14, v12, 16
+; ZVZIP-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; ZVZIP-NEXT: vzip.vv v8, v12, v14
+; ZVZIP-NEXT: vmsne.vi v0, v8, 0
+; ZVZIP-NEXT: ret
%res = call <32 x i1> @llvm.vector.interleave2.v32i1(<16 x i1> %a, <16 x i1> %b)
ret <32 x i1> %res
}
@@ -89,6 +106,14 @@ define <16 x i16> @vector_interleave_v16i16_v8i16(<8 x i16> %a, <8 x i16> %b) {
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_v16i16_v8i16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVZIP-NEXT: vmv1r.v v14, v9
+; ZVZIP-NEXT: vmv1r.v v12, v8
+; ZVZIP-NEXT: vzip.vv v8, v12, v14
+; ZVZIP-NEXT: ret
%res = call <16 x i16> @llvm.vector.interleave2.v16i16(<8 x i16> %a, <8 x i16> %b)
ret <16 x i16> %res
}
@@ -121,6 +146,14 @@ define <8 x i32> @vector_interleave_v8i32_v4i32(<4 x i32> %a, <4 x i32> %b) {
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_v8i32_v4i32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZVZIP-NEXT: vmv1r.v v14, v9
+; ZVZIP-NEXT: vmv1r.v v12, v8
+; ZVZIP-NEXT: vzip.vv v8, v12, v14
+; ZVZIP-NEXT: ret
%res = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %a, <4 x i32> %b)
ret <8 x i32> %res
}
@@ -163,6 +196,14 @@ define <4 x i64> @vector_interleave_v4i64_v2i64(<2 x i64> %a, <2 x i64> %b) {
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_v4i64_v2i64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVZIP-NEXT: vmv1r.v v14, v9
+; ZVZIP-NEXT: vmv1r.v v12, v8
+; ZVZIP-NEXT: vzip.vv v8, v12, v14
+; ZVZIP-NEXT: ret
%res = call <4 x i64> @llvm.vector.interleave2.v4i64(<2 x i64> %a, <2 x i64> %b)
ret <4 x i64> %res
}
@@ -245,6 +286,32 @@ define <6 x i32> @vector_interleave3_v6i32_v2i32(<2 x i32> %a, <2 x i32> %b, <2
; ZIP-NEXT: add sp, sp, a0
; ZIP-NEXT: addi sp, sp, 16
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave3_v6i32_v2i32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -16
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: addi a0, sp, 16
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: srli a1, a1, 1
+; ZVZIP-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
+; ZVZIP-NEXT: vsseg3e32.v v8, (a0)
+; ZVZIP-NEXT: add a2, a0, a1
+; ZVZIP-NEXT: vle32.v v9, (a2)
+; ZVZIP-NEXT: vle32.v v8, (a0)
+; ZVZIP-NEXT: add a1, a2, a1
+; ZVZIP-NEXT: vle32.v v10, (a1)
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v9, 2
+; ZVZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v10, 4
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: add sp, sp, a0
+; ZVZIP-NEXT: addi sp, sp, 16
+; ZVZIP-NEXT: ret
%res = call <6 x i32> @llvm.vector.interleave3.v6i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c)
ret <6 x i32> %res
}
@@ -336,6 +403,35 @@ define <8 x i32> @vector_interleave4_v8i32_v2i32(<2 x i32> %a, <2 x i32> %b, <2
; ZIP-NEXT: add sp, sp, a0
; ZIP-NEXT: addi sp, sp, 16
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave4_v8i32_v2i32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -16
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: addi a0, sp, 16
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: srli a1, a1, 1
+; ZVZIP-NEXT: add a2, a0, a1
+; ZVZIP-NEXT: vsetvli a3, zero, e32, mf2, ta, ma
+; ZVZIP-NEXT: vsseg4e32.v v8, (a0)
+; ZVZIP-NEXT: add a3, a2, a1
+; ZVZIP-NEXT: add a1, a3, a1
+; ZVZIP-NEXT: vle32.v v10, (a3)
+; ZVZIP-NEXT: vle32.v v9, (a2)
+; ZVZIP-NEXT: vle32.v v11, (a1)
+; ZVZIP-NEXT: vle32.v v8, (a0)
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVZIP-NEXT: vslideup.vi v10, v11, 2
+; ZVZIP-NEXT: vslideup.vi v8, v9, 2
+; ZVZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v10, 4
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: add sp, sp, a0
+; ZVZIP-NEXT: addi sp, sp, 16
+; ZVZIP-NEXT: ret
%res = call <8 x i32> @llvm.vector.interleave4.v8i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d)
ret <8 x i32> %res
}
@@ -439,6 +535,39 @@ define <10 x i16> @vector_interleave5_v10i16_v2i16(<2 x i16> %a, <2 x i16> %b, <
; ZIP-NEXT: add sp, sp, a0
; ZIP-NEXT: addi sp, sp, 16
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave5_v10i16_v2i16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -16
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: addi a0, sp, 16
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: srli a1, a1, 2
+; ZVZIP-NEXT: add a2, a0, a1
+; ZVZIP-NEXT: add a3, a2, a1
+; ZVZIP-NEXT: vsetvli a4, zero, e16, mf4, ta, ma
+; ZVZIP-NEXT: vsseg5e16.v v8, (a0)
+; ZVZIP-NEXT: add a4, a3, a1
+; ZVZIP-NEXT: vle16.v v9, (a2)
+; ZVZIP-NEXT: vle16.v v11, (a4)
+; ZVZIP-NEXT: vle16.v v12, (a3)
+; ZVZIP-NEXT: vle16.v v8, (a0)
+; ZVZIP-NEXT: add a1, a4, a1
+; ZVZIP-NEXT: vle16.v v10, (a1)
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVZIP-NEXT: vslideup.vi v12, v11, 2
+; ZVZIP-NEXT: vslideup.vi v8, v9, 2
+; ZVZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v12, 4
+; ZVZIP-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v10, 8
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: add sp, sp, a0
+; ZVZIP-NEXT: addi sp, sp, 16
+; ZVZIP-NEXT: ret
%res = call <10 x i16> @llvm.vector.interleave5.v10i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> %d, <2 x i16> %e)
ret <10 x i16> %res
}
@@ -551,6 +680,42 @@ define <12 x i16> @vector_interleave6_v12i16_v2i16(<2 x i16> %a, <2 x i16> %b, <
; ZIP-NEXT: add sp, sp, a0
; ZIP-NEXT: addi sp, sp, 16
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave6_v12i16_v2i16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -16
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: addi a0, sp, 16
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: srli a1, a1, 2
+; ZVZIP-NEXT: add a2, a0, a1
+; ZVZIP-NEXT: add a3, a2, a1
+; ZVZIP-NEXT: vsetvli a4, zero, e16, mf4, ta, ma
+; ZVZIP-NEXT: vsseg6e16.v v8, (a0)
+; ZVZIP-NEXT: vle16.v v9, (a2)
+; ZVZIP-NEXT: add a2, a3, a1
+; ZVZIP-NEXT: vle16.v v11, (a2)
+; ZVZIP-NEXT: add a2, a2, a1
+; ZVZIP-NEXT: vle16.v v12, (a3)
+; ZVZIP-NEXT: add a1, a2, a1
+; ZVZIP-NEXT: vle16.v v10, (a2)
+; ZVZIP-NEXT: vle16.v v8, (a0)
+; ZVZIP-NEXT: vle16.v v13, (a1)
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVZIP-NEXT: vslideup.vi v12, v11, 2
+; ZVZIP-NEXT: vslideup.vi v8, v9, 2
+; ZVZIP-NEXT: vslideup.vi v10, v13, 2
+; ZVZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v12, 4
+; ZVZIP-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v10, 8
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: add sp, sp, a0
+; ZVZIP-NEXT: addi sp, sp, 16
+; ZVZIP-NEXT: ret
%res = call <12 x i16> @llvm.vector.interleave6.v12i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> %d, <2 x i16> %e, <2 x i16> %f)
ret <12 x i16> %res
}
@@ -669,6 +834,44 @@ define <14 x i8> @vector_interleave7_v14i8_v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i
; ZIP-NEXT: add sp, sp, a0
; ZIP-NEXT: addi sp, sp, 16
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave7_v14i8_v2i8:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -16
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: addi a0, sp, 16
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: srli a1, a1, 3
+; ZVZIP-NEXT: add a2, a0, a1
+; ZVZIP-NEXT: add a3, a2, a1
+; ZVZIP-NEXT: add a4, a3, a1
+; ZVZIP-NEXT: vsetvli a5, zero, e8, mf8, ta, ma
+; ZVZIP-NEXT: vsseg7e8.v v8, (a0)
+; ZVZIP-NEXT: vle8.v v9, (a4)
+; ZVZIP-NEXT: add a4, a4, a1
+; ZVZIP-NEXT: vle8.v v10, (a2)
+; ZVZIP-NEXT: add a2, a4, a1
+; ZVZIP-NEXT: add a1, a2, a1
+; ZVZIP-NEXT: vle8.v v11, (a2)
+; ZVZIP-NEXT: vle8.v v12, (a4)
+; ZVZIP-NEXT: vle8.v v8, (a0)
+; ZVZIP-NEXT: vle8.v v13, (a1)
+; ZVZIP-NEXT: vle8.v v14, (a3)
+; ZVZIP-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
+; ZVZIP-NEXT: vslideup.vi v12, v11, 2
+; ZVZIP-NEXT: vslideup.vi v8, v10, 2
+; ZVZIP-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
+; ZVZIP-NEXT: vslideup.vi v12, v13, 4
+; ZVZIP-NEXT: vslideup.vi v8, v14, 4
+; ZVZIP-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v9, 6
+; ZVZIP-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v12, 8
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: add sp, sp, a0
+; ZVZIP-NEXT: addi sp, sp, 16
+; ZVZIP-NEXT: ret
%res = call <14 x i8> @llvm.vector.interleave7.v14i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x i8> %e, <2 x i8> %f, <2 x i8> %g)
ret <14 x i8> %res
}
@@ -796,6 +999,47 @@ define <16 x i8> @vector_interleave8_v16i8_v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i
; ZIP-NEXT: add sp, sp, a0
; ZIP-NEXT: addi sp, sp, 16
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave8_v16i8_v2i8:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -16
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: addi a0, sp, 16
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: srli a1, a1, 3
+; ZVZIP-NEXT: add a2, a0, a1
+; ZVZIP-NEXT: add a3, a2, a1
+; ZVZIP-NEXT: add a4, a3, a1
+; ZVZIP-NEXT: add a5, a4, a1
+; ZVZIP-NEXT: add a6, a5, a1
+; ZVZIP-NEXT: vsetvli a7, zero, e8, mf8, ta, ma
+; ZVZIP-NEXT: vsseg8e8.v v8, (a0)
+; ZVZIP-NEXT: vle8.v v9, (a6)
+; ZVZIP-NEXT: add a6, a6, a1
+; ZVZIP-NEXT: vle8.v v10, (a5)
+; ZVZIP-NEXT: vle8.v v11, (a6)
+; ZVZIP-NEXT: add a1, a6, a1
+; ZVZIP-NEXT: vle8.v v12, (a2)
+; ZVZIP-NEXT: vle8.v v8, (a0)
+; ZVZIP-NEXT: vle8.v v13, (a3)
+; ZVZIP-NEXT: vle8.v v14, (a4)
+; ZVZIP-NEXT: vle8.v v15, (a1)
+; ZVZIP-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
+; ZVZIP-NEXT: vslideup.vi v10, v9, 2
+; ZVZIP-NEXT: vslideup.vi v8, v12, 2
+; ZVZIP-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
+; ZVZIP-NEXT: vslideup.vi v10, v11, 4
+; ZVZIP-NEXT: vslideup.vi v8, v13, 4
+; ZVZIP-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; ZVZIP-NEXT: vslideup.vi v10, v15, 6
+; ZVZIP-NEXT: vslideup.vi v8, v14, 6
+; ZVZIP-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v10, 8
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: add sp, sp, a0
+; ZVZIP-NEXT: addi sp, sp, 16
+; ZVZIP-NEXT: ret
%res = call <16 x i8> @llvm.vector.interleave8.v16i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x i8> %e, <2 x i8> %f, <2 x i8> %g, <2 x i8> %h)
ret <16 x i8> %res
}
@@ -826,6 +1070,13 @@ define <4 x half> @vector_interleave_v4f16_v2f16(<2 x half> %a, <2 x half> %b) {
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9
; ZIP-NEXT: vmv1r.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_v4f16_v2f16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVZIP-NEXT: vzip.vv v10, v8, v9
+; ZVZIP-NEXT: vmv1r.v v8, v10
+; ZVZIP-NEXT: ret
%res = call <4 x half> @llvm.vector.interleave2.v4f16(<2 x half> %a, <2 x half> %b)
ret <4 x half> %res
}
@@ -854,6 +1105,14 @@ define <8 x half> @vector_interleave_v8f16_v4f16(<4 x half> %a, <4 x half> %b) {
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_v8f16_v4f16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVZIP-NEXT: vmv1r.v v10, v9
+; ZVZIP-NEXT: vmv1r.v v11, v8
+; ZVZIP-NEXT: vzip.vv v8, v11, v10
+; ZVZIP-NEXT: ret
%res = call <8 x half> @llvm.vector.interleave2.v8f16(<4 x half> %a, <4 x half> %b)
ret <8 x half> %res
}
@@ -883,6 +1142,14 @@ define <4 x float> @vector_interleave_v4f32_v2f32(<2 x float> %a, <2 x float> %b
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_v4f32_v2f32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVZIP-NEXT: vmv1r.v v10, v9
+; ZVZIP-NEXT: vmv1r.v v11, v8
+; ZVZIP-NEXT: vzip.vv v8, v11, v10
+; ZVZIP-NEXT: ret
%res = call <4 x float> @llvm.vector.interleave2.v4f32(<2 x float> %a, <2 x float> %b)
ret <4 x float> %res
}
@@ -914,6 +1181,14 @@ define <16 x half> @vector_interleave_v16f16_v8f16(<8 x half> %a, <8 x half> %b)
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_v16f16_v8f16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVZIP-NEXT: vmv1r.v v14, v9
+; ZVZIP-NEXT: vmv1r.v v12, v8
+; ZVZIP-NEXT: vzip.vv v8, v12, v14
+; ZVZIP-NEXT: ret
%res = call <16 x half> @llvm.vector.interleave2.v16f16(<8 x half> %a, <8 x half> %b)
ret <16 x half> %res
}
@@ -946,6 +1221,14 @@ define <8 x float> @vector_interleave_v8f32_v4f32(<4 x float> %a, <4 x float> %b
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_v8f32_v4f32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZVZIP-NEXT: vmv1r.v v14, v9
+; ZVZIP-NEXT: vmv1r.v v12, v8
+; ZVZIP-NEXT: vzip.vv v8, v12, v14
+; ZVZIP-NEXT: ret
%res = call <8 x float> @llvm.vector.interleave2.v8f32(<4 x float> %a, <4 x float> %b)
ret <8 x float> %res
}
@@ -988,6 +1271,14 @@ define <4 x double> @vector_interleave_v4f64_v2f64(<2 x double> %a, <2 x double>
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_v4f64_v2f64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVZIP-NEXT: vmv1r.v v14, v9
+; ZVZIP-NEXT: vmv1r.v v12, v8
+; ZVZIP-NEXT: vzip.vv v8, v12, v14
+; ZVZIP-NEXT: ret
%res = call <4 x double> @llvm.vector.interleave2.v4f64(<2 x double> %a, <2 x double> %b)
ret <4 x double> %res
}
@@ -1070,6 +1361,32 @@ define <6 x float> @vector_interleave3_v6f32_v2f32(<2 x float> %a, <2 x float> %
; ZIP-NEXT: add sp, sp, a0
; ZIP-NEXT: addi sp, sp, 16
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave3_v6f32_v2f32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -16
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: addi a0, sp, 16
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: srli a1, a1, 1
+; ZVZIP-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
+; ZVZIP-NEXT: vsseg3e32.v v8, (a0)
+; ZVZIP-NEXT: add a2, a0, a1
+; ZVZIP-NEXT: vle32.v v9, (a2)
+; ZVZIP-NEXT: vle32.v v8, (a0)
+; ZVZIP-NEXT: add a1, a2, a1
+; ZVZIP-NEXT: vle32.v v10, (a1)
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v9, 2
+; ZVZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v10, 4
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: add sp, sp, a0
+; ZVZIP-NEXT: addi sp, sp, 16
+; ZVZIP-NEXT: ret
%res = call <6 x float> @llvm.vector.interleave3.v6f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
ret <6 x float> %res
}
@@ -1161,6 +1478,35 @@ define <8 x float> @vector_interleave4_v8f32_v2f32(<2 x float> %a, <2 x float> %
; ZIP-NEXT: add sp, sp, a0
; ZIP-NEXT: addi sp, sp, 16
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave4_v8f32_v2f32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -16
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: addi a0, sp, 16
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: srli a1, a1, 1
+; ZVZIP-NEXT: add a2, a0, a1
+; ZVZIP-NEXT: vsetvli a3, zero, e32, mf2, ta, ma
+; ZVZIP-NEXT: vsseg4e32.v v8, (a0)
+; ZVZIP-NEXT: add a3, a2, a1
+; ZVZIP-NEXT: add a1, a3, a1
+; ZVZIP-NEXT: vle32.v v10, (a3)
+; ZVZIP-NEXT: vle32.v v9, (a2)
+; ZVZIP-NEXT: vle32.v v11, (a1)
+; ZVZIP-NEXT: vle32.v v8, (a0)
+; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVZIP-NEXT: vslideup.vi v10, v11, 2
+; ZVZIP-NEXT: vslideup.vi v8, v9, 2
+; ZVZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v10, 4
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: add sp, sp, a0
+; ZVZIP-NEXT: addi sp, sp, 16
+; ZVZIP-NEXT: ret
%res = call <8 x float> @llvm.vector.interleave4.v8f32(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d)
ret <8 x float> %res
}
@@ -1264,6 +1610,39 @@ define <10 x half> @vector_interleave5_v10f16_v2f16(<2 x half> %a, <2 x half> %b
; ZIP-NEXT: add sp, sp, a0
; ZIP-NEXT: addi sp, sp, 16
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave5_v10f16_v2f16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -16
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: addi a0, sp, 16
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: srli a1, a1, 2
+; ZVZIP-NEXT: add a2, a0, a1
+; ZVZIP-NEXT: add a3, a2, a1
+; ZVZIP-NEXT: vsetvli a4, zero, e16, mf4, ta, ma
+; ZVZIP-NEXT: vsseg5e16.v v8, (a0)
+; ZVZIP-NEXT: add a4, a3, a1
+; ZVZIP-NEXT: vle16.v v9, (a2)
+; ZVZIP-NEXT: vle16.v v11, (a4)
+; ZVZIP-NEXT: vle16.v v12, (a3)
+; ZVZIP-NEXT: vle16.v v8, (a0)
+; ZVZIP-NEXT: add a1, a4, a1
+; ZVZIP-NEXT: vle16.v v10, (a1)
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVZIP-NEXT: vslideup.vi v12, v11, 2
+; ZVZIP-NEXT: vslideup.vi v8, v9, 2
+; ZVZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v12, 4
+; ZVZIP-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v10, 8
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: add sp, sp, a0
+; ZVZIP-NEXT: addi sp, sp, 16
+; ZVZIP-NEXT: ret
%res = call <10 x half> @llvm.vector.interleave5.v10f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d, <2 x half> %e)
ret <10 x half> %res
}
@@ -1376,6 +1755,42 @@ define <12 x half> @vector_interleave6_v12f16_v2f16(<2 x half> %a, <2 x half> %b
; ZIP-NEXT: add sp, sp, a0
; ZIP-NEXT: addi sp, sp, 16
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave6_v12f16_v2f16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -16
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: addi a0, sp, 16
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: srli a1, a1, 2
+; ZVZIP-NEXT: add a2, a0, a1
+; ZVZIP-NEXT: add a3, a2, a1
+; ZVZIP-NEXT: vsetvli a4, zero, e16, mf4, ta, ma
+; ZVZIP-NEXT: vsseg6e16.v v8, (a0)
+; ZVZIP-NEXT: vle16.v v9, (a2)
+; ZVZIP-NEXT: add a2, a3, a1
+; ZVZIP-NEXT: vle16.v v11, (a2)
+; ZVZIP-NEXT: add a2, a2, a1
+; ZVZIP-NEXT: vle16.v v12, (a3)
+; ZVZIP-NEXT: add a1, a2, a1
+; ZVZIP-NEXT: vle16.v v10, (a2)
+; ZVZIP-NEXT: vle16.v v8, (a0)
+; ZVZIP-NEXT: vle16.v v13, (a1)
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVZIP-NEXT: vslideup.vi v12, v11, 2
+; ZVZIP-NEXT: vslideup.vi v8, v9, 2
+; ZVZIP-NEXT: vslideup.vi v10, v13, 2
+; ZVZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v12, 4
+; ZVZIP-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v10, 8
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: add sp, sp, a0
+; ZVZIP-NEXT: addi sp, sp, 16
+; ZVZIP-NEXT: ret
%res = call <12 x half> @llvm.vector.interleave6.v12f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d, <2 x half> %e, <2 x half> %f)
ret <12 x half> %res
}
@@ -1500,6 +1915,46 @@ define <7 x half> @vector_interleave7_v7f16_v1f16(<1 x half> %a, <1 x half> %b,
; ZIP-NEXT: add sp, sp, a0
; ZIP-NEXT: addi sp, sp, 16
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave7_v7f16_v1f16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -16
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: addi a0, sp, 16
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: srli a1, a1, 2
+; ZVZIP-NEXT: add a2, a0, a1
+; ZVZIP-NEXT: add a3, a2, a1
+; ZVZIP-NEXT: add a4, a3, a1
+; ZVZIP-NEXT: vsetvli a5, zero, e16, mf4, ta, ma
+; ZVZIP-NEXT: vsseg7e16.v v8, (a0)
+; ZVZIP-NEXT: vle16.v v9, (a4)
+; ZVZIP-NEXT: add a4, a4, a1
+; ZVZIP-NEXT: vle16.v v10, (a2)
+; ZVZIP-NEXT: add a2, a4, a1
+; ZVZIP-NEXT: add a1, a2, a1
+; ZVZIP-NEXT: vle16.v v11, (a2)
+; ZVZIP-NEXT: vle16.v v12, (a4)
+; ZVZIP-NEXT: vle16.v v8, (a0)
+; ZVZIP-NEXT: vle16.v v13, (a1)
+; ZVZIP-NEXT: vle16.v v14, (a3)
+; ZVZIP-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
+; ZVZIP-NEXT: vslideup.vi v12, v11, 1
+; ZVZIP-NEXT: vslideup.vi v8, v10, 1
+; ZVZIP-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
+; ZVZIP-NEXT: vslideup.vi v12, v13, 2
+; ZVZIP-NEXT: vslideup.vi v8, v14, 2
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v9, 3
+; ZVZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v12, 4
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: add sp, sp, a0
+; ZVZIP-NEXT: addi sp, sp, 16
+; ZVZIP-NEXT: ret
%res = call <7 x half> @llvm.vector.interleave7.v7f16(<1 x half> %a, <1 x half> %b, <1 x half> %c, <1 x half> %d, <1 x half> %e, <1 x half> %f, <1 x half> %g)
ret <7 x half> %res
}
@@ -1633,6 +2088,49 @@ define <8 x half> @vector_interleave8_v8f16_v1f16(<1 x half> %a, <1 x half> %b,
; ZIP-NEXT: add sp, sp, a0
; ZIP-NEXT: addi sp, sp, 16
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave8_v8f16_v1f16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -16
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: addi a0, sp, 16
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: srli a1, a1, 2
+; ZVZIP-NEXT: add a2, a0, a1
+; ZVZIP-NEXT: add a3, a2, a1
+; ZVZIP-NEXT: add a4, a3, a1
+; ZVZIP-NEXT: add a5, a4, a1
+; ZVZIP-NEXT: add a6, a5, a1
+; ZVZIP-NEXT: vsetvli a7, zero, e16, mf4, ta, ma
+; ZVZIP-NEXT: vsseg8e16.v v8, (a0)
+; ZVZIP-NEXT: vle16.v v9, (a6)
+; ZVZIP-NEXT: add a6, a6, a1
+; ZVZIP-NEXT: vle16.v v10, (a5)
+; ZVZIP-NEXT: vle16.v v11, (a6)
+; ZVZIP-NEXT: add a1, a6, a1
+; ZVZIP-NEXT: vle16.v v12, (a2)
+; ZVZIP-NEXT: vle16.v v8, (a0)
+; ZVZIP-NEXT: vle16.v v13, (a3)
+; ZVZIP-NEXT: vle16.v v14, (a4)
+; ZVZIP-NEXT: vle16.v v15, (a1)
+; ZVZIP-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
+; ZVZIP-NEXT: vslideup.vi v10, v9, 1
+; ZVZIP-NEXT: vslideup.vi v8, v12, 1
+; ZVZIP-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
+; ZVZIP-NEXT: vslideup.vi v10, v11, 2
+; ZVZIP-NEXT: vslideup.vi v8, v13, 2
+; ZVZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVZIP-NEXT: vslideup.vi v10, v15, 3
+; ZVZIP-NEXT: vslideup.vi v8, v14, 3
+; ZVZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVZIP-NEXT: vslideup.vi v8, v10, 4
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 1
+; ZVZIP-NEXT: add sp, sp, a0
+; ZVZIP-NEXT: addi sp, sp, 16
+; ZVZIP-NEXT: ret
%res = call <8 x half> @llvm.vector.interleave8.v8f16(<1 x half> %a, <1 x half> %b, <1 x half> %c, <1 x half> %d, <1 x half> %e, <1 x half> %f, <1 x half> %g, <1 x half> %h)
ret <8 x half> %res
}
@@ -1655,6 +2153,12 @@ define <8 x i16> @interleave4_const_splat_v8i16(<2 x i16> %a) {
; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZIP-NEXT: vmv.v.i v8, 3
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave4_const_splat_v8i16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVZIP-NEXT: vmv.v.i v8, 3
+; ZVZIP-NEXT: ret
%retval = call <8 x i16> @llvm.vector.interleave4.v8i16(<2 x i16> splat(i16 3), <2 x i16> splat(i16 3), <2 x i16> splat(i16 3), <2 x i16> splat(i16 3))
ret <8 x i16> %retval
}
@@ -1677,6 +2181,12 @@ define <8 x i16> @interleave4_same_nonconst_splat_v8i16(i16 %a) {
; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZIP-NEXT: vmv.v.x v8, a0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave4_same_nonconst_splat_v8i16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVZIP-NEXT: vmv.v.x v8, a0
+; ZVZIP-NEXT: ret
%ins = insertelement <2 x i16> poison, i16 %a, i32 0
%splat = shufflevector <2 x i16> %ins, <2 x i16> poison, <2 x i32> zeroinitializer
%retval = call <8 x i16> @llvm.vector.interleave4.v8i16(<2 x i16> %splat, <2 x i16> %splat, <2 x i16> %splat, <2 x i16> %splat)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index 0577fb1ff67bb..736217a090c93 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -6,6 +6,7 @@
; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+zvbb,+zvfh,+zvfbfmin | FileCheck %s --check-prefixes=ZVBB,ZVBB-RV32
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zvbb,+zvfh,+zvfbfmin | FileCheck %s --check-prefixes=ZVBB,ZVBB-RV64
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zvfhmin,+zvfbfmin,+experimental-xrivosvizip | FileCheck %s --check-prefixes=CHECK,ZIP
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zvfhmin,+zvfbfmin,+experimental-zvzip | FileCheck %s --check-prefixes=CHECK,ZVZIP
; Integers
@@ -67,6 +68,24 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1>
; ZIP-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; ZIP-NEXT: vslideup.vx v0, v8, a0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv32i1_nxv16i1:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; ZVZIP-NEXT: vmv1r.v v9, v0
+; ZVZIP-NEXT: vmv1r.v v0, v8
+; ZVZIP-NEXT: vmv.v.i v10, 0
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: vmerge.vim v12, v10, 1, v0
+; ZVZIP-NEXT: vmv1r.v v0, v9
+; ZVZIP-NEXT: vmerge.vim v14, v10, 1, v0
+; ZVZIP-NEXT: vzip.vv v8, v14, v12
+; ZVZIP-NEXT: vmsne.vi v12, v10, 0
+; ZVZIP-NEXT: vmsne.vi v0, v8, 0
+; ZVZIP-NEXT: srli a0, a0, 2
+; ZVZIP-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; ZVZIP-NEXT: vslideup.vx v0, v12, a0
+; ZVZIP-NEXT: ret
%res = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
ret <vscale x 32 x i1> %res
}
@@ -99,6 +118,14 @@ define <vscale x 32 x i8> @vector_interleave_nxv32i8_nxv16i8(<vscale x 16 x i8>
; ZIP-NEXT: ri.vzip2b.vv v10, v8, v12
; ZIP-NEXT: ri.vzip2a.vv v8, v14, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv32i8_nxv16i8:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; ZVZIP-NEXT: vmv2r.v v12, v10
+; ZVZIP-NEXT: vmv2r.v v14, v8
+; ZVZIP-NEXT: vzip.vv v8, v14, v12
+; ZVZIP-NEXT: ret
%res = call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
ret <vscale x 32 x i8> %res
}
@@ -131,6 +158,14 @@ define <vscale x 16 x i16> @vector_interleave_nxv16i16_nxv8i16(<vscale x 8 x i16
; ZIP-NEXT: ri.vzip2b.vv v10, v8, v12
; ZIP-NEXT: ri.vzip2a.vv v8, v14, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv16i16_nxv8i16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVZIP-NEXT: vmv2r.v v12, v10
+; ZVZIP-NEXT: vmv2r.v v14, v8
+; ZVZIP-NEXT: vzip.vv v8, v14, v12
+; ZVZIP-NEXT: ret
%res = call <vscale x 16 x i16> @llvm.vector.interleave2.nxv16i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
ret <vscale x 16 x i16> %res
}
@@ -164,6 +199,14 @@ define <vscale x 8 x i32> @vector_interleave_nxv8i32_nxv4i32(<vscale x 4 x i32>
; ZIP-NEXT: ri.vzip2b.vv v10, v8, v12
; ZIP-NEXT: ri.vzip2a.vv v8, v14, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv8i32_nxv4i32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; ZVZIP-NEXT: vmv2r.v v12, v10
+; ZVZIP-NEXT: vmv2r.v v14, v8
+; ZVZIP-NEXT: vzip.vv v8, v14, v12
+; ZVZIP-NEXT: ret
%res = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
ret <vscale x 8 x i32> %res
}
@@ -207,6 +250,14 @@ define <vscale x 4 x i64> @vector_interleave_nxv4i64_nxv2i64(<vscale x 2 x i64>
; ZIP-NEXT: ri.vzip2b.vv v10, v8, v12
; ZIP-NEXT: ri.vzip2a.vv v8, v14, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv4i64_nxv2i64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; ZVZIP-NEXT: vmv2r.v v12, v10
+; ZVZIP-NEXT: vmv2r.v v14, v8
+; ZVZIP-NEXT: vzip.vv v8, v14, v12
+; ZVZIP-NEXT: ret
%res = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
ret <vscale x 4 x i64> %res
}
@@ -271,6 +322,24 @@ define <vscale x 128 x i1> @vector_interleave_nxv128i1_nxv64i1(<vscale x 64 x i1
; ZIP-NEXT: vmsne.vi v8, v24, 0
; ZIP-NEXT: vmv1r.v v0, v9
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv128i1_nxv64i1:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; ZVZIP-NEXT: vmv1r.v v9, v0
+; ZVZIP-NEXT: vmv1r.v v0, v8
+; ZVZIP-NEXT: vmv.v.i v24, 0
+; ZVZIP-NEXT: vmerge.vim v16, v24, 1, v0
+; ZVZIP-NEXT: vmv1r.v v0, v9
+; ZVZIP-NEXT: vmerge.vim v8, v24, 1, v0
+; ZVZIP-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; ZVZIP-NEXT: vzip.vv v0, v8, v16
+; ZVZIP-NEXT: vzip.vv v24, v12, v20
+; ZVZIP-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; ZVZIP-NEXT: vmsne.vi v9, v0, 0
+; ZVZIP-NEXT: vmsne.vi v8, v24, 0
+; ZVZIP-NEXT: vmv1r.v v0, v9
+; ZVZIP-NEXT: ret
%res = call <vscale x 128 x i1> @llvm.vector.interleave2.nxv128i1(<vscale x 64 x i1> %a, <vscale x 64 x i1> %b)
ret <vscale x 128 x i1> %res
}
@@ -309,6 +378,15 @@ define <vscale x 128 x i8> @vector_interleave_nxv128i8_nxv64i8(<vscale x 64 x i8
; ZIP-NEXT: vmv8r.v v8, v24
; ZIP-NEXT: vmv8r.v v16, v0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv128i8_nxv64i8:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; ZVZIP-NEXT: vzip.vv v24, v8, v16
+; ZVZIP-NEXT: vzip.vv v0, v12, v20
+; ZVZIP-NEXT: vmv8r.v v8, v24
+; ZVZIP-NEXT: vmv8r.v v16, v0
+; ZVZIP-NEXT: ret
%res = call <vscale x 128 x i8> @llvm.vector.interleave2.nxv128i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b)
ret <vscale x 128 x i8> %res
}
@@ -347,6 +425,15 @@ define <vscale x 64 x i16> @vector_interleave_nxv64i16_nxv32i16(<vscale x 32 x i
; ZIP-NEXT: vmv8r.v v8, v24
; ZIP-NEXT: vmv8r.v v16, v0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv64i16_nxv32i16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVZIP-NEXT: vzip.vv v24, v8, v16
+; ZVZIP-NEXT: vzip.vv v0, v12, v20
+; ZVZIP-NEXT: vmv8r.v v8, v24
+; ZVZIP-NEXT: vmv8r.v v16, v0
+; ZVZIP-NEXT: ret
%res = call <vscale x 64 x i16> @llvm.vector.interleave2.nxv64i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b)
ret <vscale x 64 x i16> %res
}
@@ -386,6 +473,15 @@ define <vscale x 32 x i32> @vector_interleave_nxv32i32_nxv16i32(<vscale x 16 x i
; ZIP-NEXT: vmv8r.v v8, v24
; ZIP-NEXT: vmv8r.v v16, v0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv32i32_nxv16i32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; ZVZIP-NEXT: vzip.vv v24, v8, v16
+; ZVZIP-NEXT: vzip.vv v0, v12, v20
+; ZVZIP-NEXT: vmv8r.v v8, v24
+; ZVZIP-NEXT: vmv8r.v v16, v0
+; ZVZIP-NEXT: ret
%res = call <vscale x 32 x i32> @llvm.vector.interleave2.nxv32i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b)
ret <vscale x 32 x i32> %res
}
@@ -439,6 +535,15 @@ define <vscale x 16 x i64> @vector_interleave_nxv16i64_nxv8i64(<vscale x 8 x i64
; ZIP-NEXT: vmv8r.v v8, v24
; ZIP-NEXT: vmv8r.v v16, v0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv16i64_nxv8i64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; ZVZIP-NEXT: vzip.vv v24, v8, v16
+; ZVZIP-NEXT: vzip.vv v0, v12, v20
+; ZVZIP-NEXT: vmv8r.v v8, v24
+; ZVZIP-NEXT: vmv8r.v v16, v0
+; ZVZIP-NEXT: ret
%res = call <vscale x 16 x i64> @llvm.vector.interleave2.nxv16i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b)
ret <vscale x 16 x i64> %res
}
@@ -1566,6 +1671,72 @@ define <vscale x 80 x i8> @vector_interleave_nxv80i8_nxv16i8(<vscale x 16 x i8>
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv80i8_nxv16i8:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a1, 28
+; ZVZIP-NEXT: mul a0, a0, a1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v20, v16
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv2r.v v18, v12
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: slli a2, a1, 2
+; ZVZIP-NEXT: add a1, a2, a1
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv2r.v v16, v8
+; ZVZIP-NEXT: vmv2r.v v22, v16
+; ZVZIP-NEXT: vmv2r.v v24, v18
+; ZVZIP-NEXT: vmv1r.v v26, v20
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: vmv1r.v v23, v10
+; ZVZIP-NEXT: add a4, a1, a2
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vmv1r.v v25, v14
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v18, v11
+; ZVZIP-NEXT: vsseg5e8.v v22, (a0)
+; ZVZIP-NEXT: vmv1r.v v20, v15
+; ZVZIP-NEXT: vsseg5e8.v v17, (a1)
+; ZVZIP-NEXT: vl1r.v v16, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1r.v v17, (a6)
+; ZVZIP-NEXT: add a6, a3, a2
+; ZVZIP-NEXT: vl1r.v v10, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1r.v v11, (a6)
+; ZVZIP-NEXT: vl1r.v v8, (a0)
+; ZVZIP-NEXT: vl1r.v v9, (a3)
+; ZVZIP-NEXT: vl1r.v v14, (a4)
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a3, 10
+; ZVZIP-NEXT: mul a0, a0, a3
+; ZVZIP-NEXT: add a0, sp, a0
+; ZVZIP-NEXT: addi a0, a0, 64
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1r.v v15, (a5)
+; ZVZIP-NEXT: vl1r.v v12, (a6)
+; ZVZIP-NEXT: vl1r.v v13, (a1)
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a2, a0, a2
+; ZVZIP-NEXT: vs2r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a0)
+; ZVZIP-NEXT: vl8r.v v16, (a2)
+; ZVZIP-NEXT: vl8r.v v8, (a0)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 80 x i8> @llvm.vector.interleave5.nxv80i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e)
ret <vscale x 80 x i8> %res
}
@@ -1961,6 +2132,72 @@ define <vscale x 20 x i32> @vector_interleave_nxv20i32_nxv4i32(<vscale x 4 x i32
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv20i32_nxv4i32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a1, 28
+; ZVZIP-NEXT: mul a0, a0, a1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v20, v16
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv2r.v v18, v12
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: slli a2, a1, 2
+; ZVZIP-NEXT: add a1, a2, a1
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv2r.v v16, v8
+; ZVZIP-NEXT: vmv2r.v v22, v16
+; ZVZIP-NEXT: vmv2r.v v24, v18
+; ZVZIP-NEXT: vmv1r.v v26, v20
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: vmv1r.v v23, v10
+; ZVZIP-NEXT: add a4, a1, a2
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vmv1r.v v25, v14
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v18, v11
+; ZVZIP-NEXT: vsseg5e32.v v22, (a0)
+; ZVZIP-NEXT: vmv1r.v v20, v15
+; ZVZIP-NEXT: vsseg5e32.v v17, (a1)
+; ZVZIP-NEXT: vl1re32.v v16, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v17, (a6)
+; ZVZIP-NEXT: add a6, a3, a2
+; ZVZIP-NEXT: vl1re32.v v10, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v11, (a6)
+; ZVZIP-NEXT: vl1re32.v v8, (a0)
+; ZVZIP-NEXT: vl1re32.v v9, (a3)
+; ZVZIP-NEXT: vl1re32.v v14, (a4)
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a3, 10
+; ZVZIP-NEXT: mul a0, a0, a3
+; ZVZIP-NEXT: add a0, sp, a0
+; ZVZIP-NEXT: addi a0, a0, 64
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v15, (a5)
+; ZVZIP-NEXT: vl1re32.v v12, (a6)
+; ZVZIP-NEXT: vl1re32.v v13, (a1)
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a2, a0, a2
+; ZVZIP-NEXT: vs2r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a0)
+; ZVZIP-NEXT: vl8re32.v v16, (a2)
+; ZVZIP-NEXT: vl8re32.v v8, (a0)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 20 x i32> @llvm.vector.interleave5.nxv20i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e)
ret <vscale x 20 x i32> %res
}
@@ -2297,6 +2534,72 @@ define <vscale x 10 x i64> @vector_interleave_nxv10i64_nxv2i64(<vscale x 2 x i64
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv10i64_nxv2i64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a1, 28
+; ZVZIP-NEXT: mul a0, a0, a1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v20, v16
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv2r.v v18, v12
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: slli a2, a1, 2
+; ZVZIP-NEXT: add a1, a2, a1
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv2r.v v16, v8
+; ZVZIP-NEXT: vmv2r.v v22, v16
+; ZVZIP-NEXT: vmv2r.v v24, v18
+; ZVZIP-NEXT: vmv1r.v v26, v20
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: vmv1r.v v23, v10
+; ZVZIP-NEXT: add a4, a1, a2
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vmv1r.v v25, v14
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v18, v11
+; ZVZIP-NEXT: vsseg5e64.v v22, (a0)
+; ZVZIP-NEXT: vmv1r.v v20, v15
+; ZVZIP-NEXT: vsseg5e64.v v17, (a1)
+; ZVZIP-NEXT: vl1re64.v v16, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v17, (a6)
+; ZVZIP-NEXT: add a6, a3, a2
+; ZVZIP-NEXT: vl1re64.v v10, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v11, (a6)
+; ZVZIP-NEXT: vl1re64.v v8, (a0)
+; ZVZIP-NEXT: vl1re64.v v9, (a3)
+; ZVZIP-NEXT: vl1re64.v v14, (a4)
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a3, 10
+; ZVZIP-NEXT: mul a0, a0, a3
+; ZVZIP-NEXT: add a0, sp, a0
+; ZVZIP-NEXT: addi a0, a0, 64
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v15, (a5)
+; ZVZIP-NEXT: vl1re64.v v12, (a6)
+; ZVZIP-NEXT: vl1re64.v v13, (a1)
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a2, a0, a2
+; ZVZIP-NEXT: vs2r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a0)
+; ZVZIP-NEXT: vl8re64.v v16, (a2)
+; ZVZIP-NEXT: vl8re64.v v8, (a0)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 10 x i64> @llvm.vector.interleave5.nxv10i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e)
ret <vscale x 10 x i64> %res
}
@@ -2841,6 +3144,79 @@ define <vscale x 96 x i8> @vector_interleave_nxv96i8_nxv16i8(<vscale x 16 x i8>
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv96i8_nxv16i8:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a1, 28
+; ZVZIP-NEXT: mul a0, a0, a1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v20, v14
+; ZVZIP-NEXT: vmv2r.v v22, v12
+; ZVZIP-NEXT: vmv2r.v v24, v10
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: li a0, 6
+; ZVZIP-NEXT: mul a1, a1, a0
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: vmv1r.v v10, v25
+; ZVZIP-NEXT: vmv1r.v v11, v23
+; ZVZIP-NEXT: vmv1r.v v12, v21
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv1r.v v13, v17
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv1r.v v14, v19
+; ZVZIP-NEXT: vsseg6e8.v v9, (a1)
+; ZVZIP-NEXT: vmv1r.v v9, v24
+; ZVZIP-NEXT: add a5, a1, a2
+; ZVZIP-NEXT: vmv1r.v v10, v22
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: vmv1r.v v11, v20
+; ZVZIP-NEXT: add a4, a3, a2
+; ZVZIP-NEXT: vmv1r.v v12, v16
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v13, v18
+; ZVZIP-NEXT: vsseg6e8.v v8, (a0)
+; ZVZIP-NEXT: vl1r.v v14, (a1)
+; ZVZIP-NEXT: add a1, a6, a2
+; ZVZIP-NEXT: vl1r.v v15, (a5)
+; ZVZIP-NEXT: add a5, a1, a2
+; ZVZIP-NEXT: vl1r.v v18, (a5)
+; ZVZIP-NEXT: add a5, a5, a2
+; ZVZIP-NEXT: vl1r.v v19, (a5)
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vl1r.v v16, (a6)
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vl1r.v v12, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1r.v v13, (a6)
+; ZVZIP-NEXT: csrr a6, vlenb
+; ZVZIP-NEXT: li a7, 12
+; ZVZIP-NEXT: mul a6, a6, a7
+; ZVZIP-NEXT: add a6, sp, a6
+; ZVZIP-NEXT: addi a6, a6, 64
+; ZVZIP-NEXT: vl1r.v v17, (a1)
+; ZVZIP-NEXT: vl1r.v v10, (a4)
+; ZVZIP-NEXT: vl1r.v v11, (a5)
+; ZVZIP-NEXT: vl1r.v v8, (a0)
+; ZVZIP-NEXT: vl1r.v v9, (a3)
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a2, a6, a2
+; ZVZIP-NEXT: vs4r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a6)
+; ZVZIP-NEXT: vl8r.v v16, (a2)
+; ZVZIP-NEXT: vl8r.v v8, (a6)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 96 x i8> @llvm.vector.interleave6.nxv96i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f)
ret <vscale x 96 x i8> %res
}
@@ -3273,6 +3649,79 @@ define <vscale x 24 x i32> @vector_interleave_nxv24i32_nxv4i32(<vscale x 4 x i32
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv24i32_nxv4i32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a1, 28
+; ZVZIP-NEXT: mul a0, a0, a1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v20, v14
+; ZVZIP-NEXT: vmv2r.v v22, v12
+; ZVZIP-NEXT: vmv2r.v v24, v10
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: li a0, 6
+; ZVZIP-NEXT: mul a1, a1, a0
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: vmv1r.v v10, v25
+; ZVZIP-NEXT: vmv1r.v v11, v23
+; ZVZIP-NEXT: vmv1r.v v12, v21
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv1r.v v13, v17
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv1r.v v14, v19
+; ZVZIP-NEXT: vsseg6e32.v v9, (a1)
+; ZVZIP-NEXT: vmv1r.v v9, v24
+; ZVZIP-NEXT: add a5, a1, a2
+; ZVZIP-NEXT: vmv1r.v v10, v22
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: vmv1r.v v11, v20
+; ZVZIP-NEXT: add a4, a3, a2
+; ZVZIP-NEXT: vmv1r.v v12, v16
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v13, v18
+; ZVZIP-NEXT: vsseg6e32.v v8, (a0)
+; ZVZIP-NEXT: vl1re32.v v14, (a1)
+; ZVZIP-NEXT: add a1, a6, a2
+; ZVZIP-NEXT: vl1re32.v v15, (a5)
+; ZVZIP-NEXT: add a5, a1, a2
+; ZVZIP-NEXT: vl1re32.v v18, (a5)
+; ZVZIP-NEXT: add a5, a5, a2
+; ZVZIP-NEXT: vl1re32.v v19, (a5)
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vl1re32.v v16, (a6)
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vl1re32.v v12, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v13, (a6)
+; ZVZIP-NEXT: csrr a6, vlenb
+; ZVZIP-NEXT: li a7, 12
+; ZVZIP-NEXT: mul a6, a6, a7
+; ZVZIP-NEXT: add a6, sp, a6
+; ZVZIP-NEXT: addi a6, a6, 64
+; ZVZIP-NEXT: vl1re32.v v17, (a1)
+; ZVZIP-NEXT: vl1re32.v v10, (a4)
+; ZVZIP-NEXT: vl1re32.v v11, (a5)
+; ZVZIP-NEXT: vl1re32.v v8, (a0)
+; ZVZIP-NEXT: vl1re32.v v9, (a3)
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a2, a6, a2
+; ZVZIP-NEXT: vs4r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a6)
+; ZVZIP-NEXT: vl8re32.v v16, (a2)
+; ZVZIP-NEXT: vl8re32.v v8, (a6)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 24 x i32> @llvm.vector.interleave6.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f)
ret <vscale x 24 x i32> %res
}
@@ -3643,6 +4092,79 @@ define <vscale x 12 x i64> @vector_interleave_nxv12i64_nxv2i64(<vscale x 2 x i64
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv12i64_nxv2i64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a1, 28
+; ZVZIP-NEXT: mul a0, a0, a1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v20, v14
+; ZVZIP-NEXT: vmv2r.v v22, v12
+; ZVZIP-NEXT: vmv2r.v v24, v10
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: li a0, 6
+; ZVZIP-NEXT: mul a1, a1, a0
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: vmv1r.v v10, v25
+; ZVZIP-NEXT: vmv1r.v v11, v23
+; ZVZIP-NEXT: vmv1r.v v12, v21
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv1r.v v13, v17
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv1r.v v14, v19
+; ZVZIP-NEXT: vsseg6e64.v v9, (a1)
+; ZVZIP-NEXT: vmv1r.v v9, v24
+; ZVZIP-NEXT: add a5, a1, a2
+; ZVZIP-NEXT: vmv1r.v v10, v22
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: vmv1r.v v11, v20
+; ZVZIP-NEXT: add a4, a3, a2
+; ZVZIP-NEXT: vmv1r.v v12, v16
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v13, v18
+; ZVZIP-NEXT: vsseg6e64.v v8, (a0)
+; ZVZIP-NEXT: vl1re64.v v14, (a1)
+; ZVZIP-NEXT: add a1, a6, a2
+; ZVZIP-NEXT: vl1re64.v v15, (a5)
+; ZVZIP-NEXT: add a5, a1, a2
+; ZVZIP-NEXT: vl1re64.v v18, (a5)
+; ZVZIP-NEXT: add a5, a5, a2
+; ZVZIP-NEXT: vl1re64.v v19, (a5)
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vl1re64.v v16, (a6)
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vl1re64.v v12, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v13, (a6)
+; ZVZIP-NEXT: csrr a6, vlenb
+; ZVZIP-NEXT: li a7, 12
+; ZVZIP-NEXT: mul a6, a6, a7
+; ZVZIP-NEXT: add a6, sp, a6
+; ZVZIP-NEXT: addi a6, a6, 64
+; ZVZIP-NEXT: vl1re64.v v17, (a1)
+; ZVZIP-NEXT: vl1re64.v v10, (a4)
+; ZVZIP-NEXT: vl1re64.v v11, (a5)
+; ZVZIP-NEXT: vl1re64.v v8, (a0)
+; ZVZIP-NEXT: vl1re64.v v9, (a3)
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a2, a6, a2
+; ZVZIP-NEXT: vs4r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a6)
+; ZVZIP-NEXT: vl8re64.v v16, (a2)
+; ZVZIP-NEXT: vl8re64.v v8, (a6)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 12 x i64> @llvm.vector.interleave6.nxv12i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f)
ret <vscale x 12 x i64> %res
}
@@ -4242,6 +4764,87 @@ define <vscale x 112 x i8> @vector_interleave_nxv112i8_nxv16i8(<vscale x 16 x i8
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv112i8_nxv16i8:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 5
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v26, v20
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv2r.v v24, v16
+; ZVZIP-NEXT: vmv2r.v v22, v12
+; ZVZIP-NEXT: vmv2r.v v20, v8
+; ZVZIP-NEXT: vmv1r.v v1, v20
+; ZVZIP-NEXT: vmv1r.v v3, v22
+; ZVZIP-NEXT: vmv1r.v v5, v24
+; ZVZIP-NEXT: vmv1r.v v7, v26
+; ZVZIP-NEXT: vmv1r.v v2, v10
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: slli a2, a1, 3
+; ZVZIP-NEXT: sub a1, a2, a1
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv1r.v v4, v14
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: add a4, a1, a2
+; ZVZIP-NEXT: vmv1r.v v6, v18
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vmv1r.v v22, v11
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v24, v15
+; ZVZIP-NEXT: vsseg7e8.v v1, (a0)
+; ZVZIP-NEXT: vmv1r.v v26, v19
+; ZVZIP-NEXT: vsseg7e8.v v21, (a1)
+; ZVZIP-NEXT: vl1r.v v18, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1r.v v19, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1r.v v20, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1r.v v21, (a6)
+; ZVZIP-NEXT: add a6, a3, a2
+; ZVZIP-NEXT: vl1r.v v10, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1r.v v11, (a6)
+; ZVZIP-NEXT: vl1r.v v8, (a0)
+; ZVZIP-NEXT: vl1r.v v16, (a4)
+; ZVZIP-NEXT: vl1r.v v9, (a3)
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a3, 14
+; ZVZIP-NEXT: mul a0, a0, a3
+; ZVZIP-NEXT: add a0, sp, a0
+; ZVZIP-NEXT: addi a0, a0, 64
+; ZVZIP-NEXT: vl1r.v v17, (a5)
+; ZVZIP-NEXT: slli a3, a2, 2
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1r.v v12, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1r.v v13, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a3, a2, a3
+; ZVZIP-NEXT: add a2, a0, a2
+; ZVZIP-NEXT: vl1r.v v14, (a6)
+; ZVZIP-NEXT: vl1r.v v15, (a1)
+; ZVZIP-NEXT: add a3, a0, a3
+; ZVZIP-NEXT: vs2r.v v20, (a3)
+; ZVZIP-NEXT: vs4r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a0)
+; ZVZIP-NEXT: vl8r.v v16, (a2)
+; ZVZIP-NEXT: vl8r.v v8, (a0)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 112 x i8> @llvm.vector.interleave7.nxv112i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f, <vscale x 16 x i8> %g)
ret <vscale x 112 x i8> %res
}
@@ -4653,6 +5256,87 @@ define <vscale x 56 x i16> @vector_interleave_nxv56i16_nxv8i16(<vscale x 8 x i16
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv56i16_nxv8i16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 5
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v26, v20
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv2r.v v24, v16
+; ZVZIP-NEXT: vmv2r.v v22, v12
+; ZVZIP-NEXT: vmv2r.v v20, v8
+; ZVZIP-NEXT: vmv1r.v v1, v20
+; ZVZIP-NEXT: vmv1r.v v3, v22
+; ZVZIP-NEXT: vmv1r.v v5, v24
+; ZVZIP-NEXT: vmv1r.v v7, v26
+; ZVZIP-NEXT: vmv1r.v v2, v10
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: slli a2, a1, 3
+; ZVZIP-NEXT: sub a1, a2, a1
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv1r.v v4, v14
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: add a4, a1, a2
+; ZVZIP-NEXT: vmv1r.v v6, v18
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vmv1r.v v22, v11
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v24, v15
+; ZVZIP-NEXT: vsseg7e16.v v1, (a0)
+; ZVZIP-NEXT: vmv1r.v v26, v19
+; ZVZIP-NEXT: vsseg7e16.v v21, (a1)
+; ZVZIP-NEXT: vl1re16.v v18, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v19, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v20, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v21, (a6)
+; ZVZIP-NEXT: add a6, a3, a2
+; ZVZIP-NEXT: vl1re16.v v10, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v11, (a6)
+; ZVZIP-NEXT: vl1re16.v v8, (a0)
+; ZVZIP-NEXT: vl1re16.v v16, (a4)
+; ZVZIP-NEXT: vl1re16.v v9, (a3)
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a3, 14
+; ZVZIP-NEXT: mul a0, a0, a3
+; ZVZIP-NEXT: add a0, sp, a0
+; ZVZIP-NEXT: addi a0, a0, 64
+; ZVZIP-NEXT: vl1re16.v v17, (a5)
+; ZVZIP-NEXT: slli a3, a2, 2
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v12, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v13, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a3, a2, a3
+; ZVZIP-NEXT: add a2, a0, a2
+; ZVZIP-NEXT: vl1re16.v v14, (a6)
+; ZVZIP-NEXT: vl1re16.v v15, (a1)
+; ZVZIP-NEXT: add a3, a0, a3
+; ZVZIP-NEXT: vs2r.v v20, (a3)
+; ZVZIP-NEXT: vs4r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a0)
+; ZVZIP-NEXT: vl8re16.v v16, (a2)
+; ZVZIP-NEXT: vl8re16.v v8, (a0)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 56 x i16> @llvm.vector.interleave7.nxv56i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f, <vscale x 8 x i16> %g)
ret <vscale x 56 x i16> %res
}
@@ -5064,6 +5748,87 @@ define <vscale x 28 x i32> @vector_interleave_nxv28i32_nxv4i32(<vscale x 4 x i32
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv28i32_nxv4i32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 5
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v26, v20
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv2r.v v24, v16
+; ZVZIP-NEXT: vmv2r.v v22, v12
+; ZVZIP-NEXT: vmv2r.v v20, v8
+; ZVZIP-NEXT: vmv1r.v v1, v20
+; ZVZIP-NEXT: vmv1r.v v3, v22
+; ZVZIP-NEXT: vmv1r.v v5, v24
+; ZVZIP-NEXT: vmv1r.v v7, v26
+; ZVZIP-NEXT: vmv1r.v v2, v10
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: slli a2, a1, 3
+; ZVZIP-NEXT: sub a1, a2, a1
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv1r.v v4, v14
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: add a4, a1, a2
+; ZVZIP-NEXT: vmv1r.v v6, v18
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vmv1r.v v22, v11
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v24, v15
+; ZVZIP-NEXT: vsseg7e32.v v1, (a0)
+; ZVZIP-NEXT: vmv1r.v v26, v19
+; ZVZIP-NEXT: vsseg7e32.v v21, (a1)
+; ZVZIP-NEXT: vl1re32.v v18, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v19, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v20, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v21, (a6)
+; ZVZIP-NEXT: add a6, a3, a2
+; ZVZIP-NEXT: vl1re32.v v10, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v11, (a6)
+; ZVZIP-NEXT: vl1re32.v v8, (a0)
+; ZVZIP-NEXT: vl1re32.v v16, (a4)
+; ZVZIP-NEXT: vl1re32.v v9, (a3)
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a3, 14
+; ZVZIP-NEXT: mul a0, a0, a3
+; ZVZIP-NEXT: add a0, sp, a0
+; ZVZIP-NEXT: addi a0, a0, 64
+; ZVZIP-NEXT: vl1re32.v v17, (a5)
+; ZVZIP-NEXT: slli a3, a2, 2
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v12, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v13, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a3, a2, a3
+; ZVZIP-NEXT: add a2, a0, a2
+; ZVZIP-NEXT: vl1re32.v v14, (a6)
+; ZVZIP-NEXT: vl1re32.v v15, (a1)
+; ZVZIP-NEXT: add a3, a0, a3
+; ZVZIP-NEXT: vs2r.v v20, (a3)
+; ZVZIP-NEXT: vs4r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a0)
+; ZVZIP-NEXT: vl8re32.v v16, (a2)
+; ZVZIP-NEXT: vl8re32.v v8, (a0)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 28 x i32> @llvm.vector.interleave7.nxv28i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f, <vscale x 4 x i32> %g)
ret <vscale x 28 x i32> %res
}
@@ -5474,6 +6239,87 @@ define <vscale x 14 x i64> @vector_interleave_nxv14i64_nxv2i64(<vscale x 2 x i64
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv14i64_nxv2i64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 5
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v26, v20
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv2r.v v24, v16
+; ZVZIP-NEXT: vmv2r.v v22, v12
+; ZVZIP-NEXT: vmv2r.v v20, v8
+; ZVZIP-NEXT: vmv1r.v v1, v20
+; ZVZIP-NEXT: vmv1r.v v3, v22
+; ZVZIP-NEXT: vmv1r.v v5, v24
+; ZVZIP-NEXT: vmv1r.v v7, v26
+; ZVZIP-NEXT: vmv1r.v v2, v10
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: slli a2, a1, 3
+; ZVZIP-NEXT: sub a1, a2, a1
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv1r.v v4, v14
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: add a4, a1, a2
+; ZVZIP-NEXT: vmv1r.v v6, v18
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vmv1r.v v22, v11
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v24, v15
+; ZVZIP-NEXT: vsseg7e64.v v1, (a0)
+; ZVZIP-NEXT: vmv1r.v v26, v19
+; ZVZIP-NEXT: vsseg7e64.v v21, (a1)
+; ZVZIP-NEXT: vl1re64.v v18, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v19, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v20, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v21, (a6)
+; ZVZIP-NEXT: add a6, a3, a2
+; ZVZIP-NEXT: vl1re64.v v10, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v11, (a6)
+; ZVZIP-NEXT: vl1re64.v v8, (a0)
+; ZVZIP-NEXT: vl1re64.v v16, (a4)
+; ZVZIP-NEXT: vl1re64.v v9, (a3)
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a3, 14
+; ZVZIP-NEXT: mul a0, a0, a3
+; ZVZIP-NEXT: add a0, sp, a0
+; ZVZIP-NEXT: addi a0, a0, 64
+; ZVZIP-NEXT: vl1re64.v v17, (a5)
+; ZVZIP-NEXT: slli a3, a2, 2
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v12, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v13, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a3, a2, a3
+; ZVZIP-NEXT: add a2, a0, a2
+; ZVZIP-NEXT: vl1re64.v v14, (a6)
+; ZVZIP-NEXT: vl1re64.v v15, (a1)
+; ZVZIP-NEXT: add a3, a0, a3
+; ZVZIP-NEXT: vs2r.v v20, (a3)
+; ZVZIP-NEXT: vs4r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a0)
+; ZVZIP-NEXT: vl8re64.v v16, (a2)
+; ZVZIP-NEXT: vl8re64.v v8, (a0)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 14 x i64> @llvm.vector.interleave7.nxv14i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f, <vscale x 2 x i64> %g)
ret <vscale x 14 x i64> %res
}
@@ -6280,6 +7126,18 @@ define <vscale x 4 x bfloat> @vector_interleave_nxv4bf16_nxv2bf16(<vscale x 2 x
; ZIP-NEXT: vslideup.vx v10, v11, a0
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv4bf16_nxv2bf16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVZIP-NEXT: vzip.vv v10, v8, v9
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: srli a0, a0, 2
+; ZVZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVZIP-NEXT: vslidedown.vx v8, v10, a0
+; ZVZIP-NEXT: vslideup.vx v10, v8, a0
+; ZVZIP-NEXT: vmv.v.v v8, v10
+; ZVZIP-NEXT: ret
%res = call <vscale x 4 x bfloat> @llvm.vector.interleave2.nxv4bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
ret <vscale x 4 x bfloat> %res
}
@@ -6312,6 +7170,14 @@ define <vscale x 8 x bfloat> @vector_interleave_nxv8bf16_nxv4bf16(<vscale x 4 x
; ZIP-NEXT: ri.vzip2b.vv v9, v8, v10
; ZIP-NEXT: ri.vzip2a.vv v8, v11, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv8bf16_nxv4bf16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVZIP-NEXT: vmv1r.v v10, v9
+; ZVZIP-NEXT: vmv1r.v v11, v8
+; ZVZIP-NEXT: vzip.vv v8, v11, v10
+; ZVZIP-NEXT: ret
%res = call <vscale x 8 x bfloat> @llvm.vector.interleave2.nxv8bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
ret <vscale x 8 x bfloat> %res
}
@@ -6355,6 +7221,18 @@ define <vscale x 4 x half> @vector_interleave_nxv4f16_nxv2f16(<vscale x 2 x half
; ZIP-NEXT: vslideup.vx v10, v11, a0
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv4f16_nxv2f16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVZIP-NEXT: vzip.vv v10, v8, v9
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: srli a0, a0, 2
+; ZVZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVZIP-NEXT: vslidedown.vx v8, v10, a0
+; ZVZIP-NEXT: vslideup.vx v10, v8, a0
+; ZVZIP-NEXT: vmv.v.v v8, v10
+; ZVZIP-NEXT: ret
%res = call <vscale x 4 x half> @llvm.vector.interleave2.nxv4f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
ret <vscale x 4 x half> %res
}
@@ -6387,6 +7265,14 @@ define <vscale x 8 x half> @vector_interleave_nxv8f16_nxv4f16(<vscale x 4 x half
; ZIP-NEXT: ri.vzip2b.vv v9, v8, v10
; ZIP-NEXT: ri.vzip2a.vv v8, v11, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv8f16_nxv4f16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVZIP-NEXT: vmv1r.v v10, v9
+; ZVZIP-NEXT: vmv1r.v v11, v8
+; ZVZIP-NEXT: vzip.vv v8, v11, v10
+; ZVZIP-NEXT: ret
%res = call <vscale x 8 x half> @llvm.vector.interleave2.nxv8f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
ret <vscale x 8 x half> %res
}
@@ -6420,6 +7306,14 @@ define <vscale x 4 x float> @vector_interleave_nxv4f32_nxv2f32(<vscale x 2 x flo
; ZIP-NEXT: ri.vzip2b.vv v9, v8, v10
; ZIP-NEXT: ri.vzip2a.vv v8, v11, v10
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv4f32_nxv2f32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; ZVZIP-NEXT: vmv1r.v v10, v9
+; ZVZIP-NEXT: vmv1r.v v11, v8
+; ZVZIP-NEXT: vzip.vv v8, v11, v10
+; ZVZIP-NEXT: ret
%res = call <vscale x 4 x float> @llvm.vector.interleave2.nxv4f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b)
ret <vscale x 4 x float> %res
}
@@ -6452,6 +7346,14 @@ define <vscale x 16 x bfloat> @vector_interleave_nxv16bf16_nxv8bf16(<vscale x 8
; ZIP-NEXT: ri.vzip2b.vv v10, v8, v12
; ZIP-NEXT: ri.vzip2a.vv v8, v14, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv16bf16_nxv8bf16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVZIP-NEXT: vmv2r.v v12, v10
+; ZVZIP-NEXT: vmv2r.v v14, v8
+; ZVZIP-NEXT: vzip.vv v8, v14, v12
+; ZVZIP-NEXT: ret
%res = call <vscale x 16 x bfloat> @llvm.vector.interleave2.nxv16bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
ret <vscale x 16 x bfloat> %res
}
@@ -6484,6 +7386,14 @@ define <vscale x 16 x half> @vector_interleave_nxv16f16_nxv8f16(<vscale x 8 x ha
; ZIP-NEXT: ri.vzip2b.vv v10, v8, v12
; ZIP-NEXT: ri.vzip2a.vv v8, v14, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv16f16_nxv8f16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVZIP-NEXT: vmv2r.v v12, v10
+; ZVZIP-NEXT: vmv2r.v v14, v8
+; ZVZIP-NEXT: vzip.vv v8, v14, v12
+; ZVZIP-NEXT: ret
%res = call <vscale x 16 x half> @llvm.vector.interleave2.nxv16f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
ret <vscale x 16 x half> %res
}
@@ -6517,6 +7427,14 @@ define <vscale x 8 x float> @vector_interleave_nxv8f32_nxv4f32(<vscale x 4 x flo
; ZIP-NEXT: ri.vzip2b.vv v10, v8, v12
; ZIP-NEXT: ri.vzip2a.vv v8, v14, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv8f32_nxv4f32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; ZVZIP-NEXT: vmv2r.v v12, v10
+; ZVZIP-NEXT: vmv2r.v v14, v8
+; ZVZIP-NEXT: vzip.vv v8, v14, v12
+; ZVZIP-NEXT: ret
%res = call <vscale x 8 x float> @llvm.vector.interleave2.nxv8f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
ret <vscale x 8 x float> %res
}
@@ -6560,6 +7478,14 @@ define <vscale x 4 x double> @vector_interleave_nxv4f64_nxv2f64(<vscale x 2 x do
; ZIP-NEXT: ri.vzip2b.vv v10, v8, v12
; ZIP-NEXT: ri.vzip2a.vv v8, v14, v12
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv4f64_nxv2f64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; ZVZIP-NEXT: vmv2r.v v12, v10
+; ZVZIP-NEXT: vmv2r.v v14, v8
+; ZVZIP-NEXT: vzip.vv v8, v14, v12
+; ZVZIP-NEXT: ret
%res = call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
ret <vscale x 4 x double> %res
}
@@ -6600,6 +7526,15 @@ define <vscale x 64 x bfloat> @vector_interleave_nxv64bf16_nxv32bf16(<vscale x 3
; ZIP-NEXT: vmv8r.v v8, v24
; ZIP-NEXT: vmv8r.v v16, v0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv64bf16_nxv32bf16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVZIP-NEXT: vzip.vv v24, v8, v16
+; ZVZIP-NEXT: vzip.vv v0, v12, v20
+; ZVZIP-NEXT: vmv8r.v v8, v24
+; ZVZIP-NEXT: vmv8r.v v16, v0
+; ZVZIP-NEXT: ret
%res = call <vscale x 64 x bfloat> @llvm.vector.interleave2.nxv64bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
ret <vscale x 64 x bfloat> %res
}
@@ -6638,6 +7573,15 @@ define <vscale x 64 x half> @vector_interleave_nxv64f16_nxv32f16(<vscale x 32 x
; ZIP-NEXT: vmv8r.v v8, v24
; ZIP-NEXT: vmv8r.v v16, v0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv64f16_nxv32f16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVZIP-NEXT: vzip.vv v24, v8, v16
+; ZVZIP-NEXT: vzip.vv v0, v12, v20
+; ZVZIP-NEXT: vmv8r.v v8, v24
+; ZVZIP-NEXT: vmv8r.v v16, v0
+; ZVZIP-NEXT: ret
%res = call <vscale x 64 x half> @llvm.vector.interleave2.nxv64f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b)
ret <vscale x 64 x half> %res
}
@@ -6677,6 +7621,15 @@ define <vscale x 32 x float> @vector_interleave_nxv32f32_nxv16f32(<vscale x 16 x
; ZIP-NEXT: vmv8r.v v8, v24
; ZIP-NEXT: vmv8r.v v16, v0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv32f32_nxv16f32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; ZVZIP-NEXT: vzip.vv v24, v8, v16
+; ZVZIP-NEXT: vzip.vv v0, v12, v20
+; ZVZIP-NEXT: vmv8r.v v8, v24
+; ZVZIP-NEXT: vmv8r.v v16, v0
+; ZVZIP-NEXT: ret
%res = call <vscale x 32 x float> @llvm.vector.interleave2.nxv32f32(<vscale x 16 x float> %a, <vscale x 16 x float> %b)
ret <vscale x 32 x float> %res
}
@@ -6730,6 +7683,15 @@ define <vscale x 16 x double> @vector_interleave_nxv16f64_nxv8f64(<vscale x 8 x
; ZIP-NEXT: vmv8r.v v8, v24
; ZIP-NEXT: vmv8r.v v16, v0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv16f64_nxv8f64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; ZVZIP-NEXT: vzip.vv v24, v8, v16
+; ZVZIP-NEXT: vzip.vv v0, v12, v20
+; ZVZIP-NEXT: vmv8r.v v8, v24
+; ZVZIP-NEXT: vmv8r.v v16, v0
+; ZVZIP-NEXT: ret
%res = call <vscale x 16 x double> @llvm.vector.interleave2.nxv16f64(<vscale x 8 x double> %a, <vscale x 8 x double> %b)
ret <vscale x 16 x double> %res
}
@@ -8372,6 +9334,72 @@ define <vscale x 40 x half> @vector_interleave_nxv40f16_nxv8f16(<vscale x 8 x ha
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv40f16_nxv8f16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a1, 28
+; ZVZIP-NEXT: mul a0, a0, a1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v20, v16
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv2r.v v18, v12
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: slli a2, a1, 2
+; ZVZIP-NEXT: add a1, a2, a1
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv2r.v v16, v8
+; ZVZIP-NEXT: vmv2r.v v22, v16
+; ZVZIP-NEXT: vmv2r.v v24, v18
+; ZVZIP-NEXT: vmv1r.v v26, v20
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: vmv1r.v v23, v10
+; ZVZIP-NEXT: add a4, a1, a2
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vmv1r.v v25, v14
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v18, v11
+; ZVZIP-NEXT: vsseg5e16.v v22, (a0)
+; ZVZIP-NEXT: vmv1r.v v20, v15
+; ZVZIP-NEXT: vsseg5e16.v v17, (a1)
+; ZVZIP-NEXT: vl1re16.v v16, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v17, (a6)
+; ZVZIP-NEXT: add a6, a3, a2
+; ZVZIP-NEXT: vl1re16.v v10, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v11, (a6)
+; ZVZIP-NEXT: vl1re16.v v8, (a0)
+; ZVZIP-NEXT: vl1re16.v v9, (a3)
+; ZVZIP-NEXT: vl1re16.v v14, (a4)
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a3, 10
+; ZVZIP-NEXT: mul a0, a0, a3
+; ZVZIP-NEXT: add a0, sp, a0
+; ZVZIP-NEXT: addi a0, a0, 64
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v15, (a5)
+; ZVZIP-NEXT: vl1re16.v v12, (a6)
+; ZVZIP-NEXT: vl1re16.v v13, (a1)
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a2, a0, a2
+; ZVZIP-NEXT: vs2r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a0)
+; ZVZIP-NEXT: vl8re16.v v16, (a2)
+; ZVZIP-NEXT: vl8re16.v v8, (a0)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 40 x half> @llvm.vector.interleave5.nxv40f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x half> %v4)
ret <vscale x 40 x half> %res
}
@@ -8838,6 +9866,72 @@ define <vscale x 40 x bfloat> @vector_interleave_nxv40bf16_nxv8bf16(<vscale x 8
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv40bf16_nxv8bf16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a1, 28
+; ZVZIP-NEXT: mul a0, a0, a1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v20, v16
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv2r.v v18, v12
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: slli a2, a1, 2
+; ZVZIP-NEXT: add a1, a2, a1
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv2r.v v16, v8
+; ZVZIP-NEXT: vmv2r.v v22, v16
+; ZVZIP-NEXT: vmv2r.v v24, v18
+; ZVZIP-NEXT: vmv1r.v v26, v20
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: vmv1r.v v23, v10
+; ZVZIP-NEXT: add a4, a1, a2
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vmv1r.v v25, v14
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v18, v11
+; ZVZIP-NEXT: vsseg5e16.v v22, (a0)
+; ZVZIP-NEXT: vmv1r.v v20, v15
+; ZVZIP-NEXT: vsseg5e16.v v17, (a1)
+; ZVZIP-NEXT: vl1re16.v v16, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v17, (a6)
+; ZVZIP-NEXT: add a6, a3, a2
+; ZVZIP-NEXT: vl1re16.v v10, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v11, (a6)
+; ZVZIP-NEXT: vl1re16.v v8, (a0)
+; ZVZIP-NEXT: vl1re16.v v9, (a3)
+; ZVZIP-NEXT: vl1re16.v v14, (a4)
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a3, 10
+; ZVZIP-NEXT: mul a0, a0, a3
+; ZVZIP-NEXT: add a0, sp, a0
+; ZVZIP-NEXT: addi a0, a0, 64
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v15, (a5)
+; ZVZIP-NEXT: vl1re16.v v12, (a6)
+; ZVZIP-NEXT: vl1re16.v v13, (a1)
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a2, a0, a2
+; ZVZIP-NEXT: vs2r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a0)
+; ZVZIP-NEXT: vl8re16.v v16, (a2)
+; ZVZIP-NEXT: vl8re16.v v8, (a0)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 40 x bfloat> @llvm.vector.interleave5.nxv40bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x bfloat> %v4)
ret <vscale x 40 x bfloat> %res
}
@@ -9304,6 +10398,72 @@ define <vscale x 20 x float> @vector_interleave_nxv20f32_nxv4f32(<vscale x 4 x f
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv20f32_nxv4f32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a1, 28
+; ZVZIP-NEXT: mul a0, a0, a1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v20, v16
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv2r.v v18, v12
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: slli a2, a1, 2
+; ZVZIP-NEXT: add a1, a2, a1
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv2r.v v16, v8
+; ZVZIP-NEXT: vmv2r.v v22, v16
+; ZVZIP-NEXT: vmv2r.v v24, v18
+; ZVZIP-NEXT: vmv1r.v v26, v20
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: vmv1r.v v23, v10
+; ZVZIP-NEXT: add a4, a1, a2
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vmv1r.v v25, v14
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v18, v11
+; ZVZIP-NEXT: vsseg5e32.v v22, (a0)
+; ZVZIP-NEXT: vmv1r.v v20, v15
+; ZVZIP-NEXT: vsseg5e32.v v17, (a1)
+; ZVZIP-NEXT: vl1re32.v v16, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v17, (a6)
+; ZVZIP-NEXT: add a6, a3, a2
+; ZVZIP-NEXT: vl1re32.v v10, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v11, (a6)
+; ZVZIP-NEXT: vl1re32.v v8, (a0)
+; ZVZIP-NEXT: vl1re32.v v9, (a3)
+; ZVZIP-NEXT: vl1re32.v v14, (a4)
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a3, 10
+; ZVZIP-NEXT: mul a0, a0, a3
+; ZVZIP-NEXT: add a0, sp, a0
+; ZVZIP-NEXT: addi a0, a0, 64
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v15, (a5)
+; ZVZIP-NEXT: vl1re32.v v12, (a6)
+; ZVZIP-NEXT: vl1re32.v v13, (a1)
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a2, a0, a2
+; ZVZIP-NEXT: vs2r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a0)
+; ZVZIP-NEXT: vl8re32.v v16, (a2)
+; ZVZIP-NEXT: vl8re32.v v8, (a0)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 20 x float> @llvm.vector.interleave5.nxv20f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x float> %v4)
ret <vscale x 20 x float> %res
}
@@ -9696,6 +10856,72 @@ define <vscale x 10 x double> @vector_interleave_nxv10f64_nxv2f64(<vscale x 2 x
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv10f64_nxv2f64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a1, 28
+; ZVZIP-NEXT: mul a0, a0, a1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v20, v16
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv2r.v v18, v12
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: slli a2, a1, 2
+; ZVZIP-NEXT: add a1, a2, a1
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv2r.v v16, v8
+; ZVZIP-NEXT: vmv2r.v v22, v16
+; ZVZIP-NEXT: vmv2r.v v24, v18
+; ZVZIP-NEXT: vmv1r.v v26, v20
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: vmv1r.v v23, v10
+; ZVZIP-NEXT: add a4, a1, a2
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vmv1r.v v25, v14
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v18, v11
+; ZVZIP-NEXT: vsseg5e64.v v22, (a0)
+; ZVZIP-NEXT: vmv1r.v v20, v15
+; ZVZIP-NEXT: vsseg5e64.v v17, (a1)
+; ZVZIP-NEXT: vl1re64.v v16, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v17, (a6)
+; ZVZIP-NEXT: add a6, a3, a2
+; ZVZIP-NEXT: vl1re64.v v10, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v11, (a6)
+; ZVZIP-NEXT: vl1re64.v v8, (a0)
+; ZVZIP-NEXT: vl1re64.v v9, (a3)
+; ZVZIP-NEXT: vl1re64.v v14, (a4)
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a3, 10
+; ZVZIP-NEXT: mul a0, a0, a3
+; ZVZIP-NEXT: add a0, sp, a0
+; ZVZIP-NEXT: addi a0, a0, 64
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v15, (a5)
+; ZVZIP-NEXT: vl1re64.v v12, (a6)
+; ZVZIP-NEXT: vl1re64.v v13, (a1)
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a2, a0, a2
+; ZVZIP-NEXT: vs2r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a0)
+; ZVZIP-NEXT: vl8re64.v v16, (a2)
+; ZVZIP-NEXT: vl8re64.v v8, (a0)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 10 x double> @llvm.vector.interleave5.nxv10f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x double> %v4)
ret <vscale x 10 x double> %res
}
@@ -10209,6 +11435,79 @@ define <vscale x 48 x half> @vector_interleave_nxv48f16_nxv8f16(<vscale x 8 x ha
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv48f16_nxv8f16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a1, 28
+; ZVZIP-NEXT: mul a0, a0, a1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v20, v14
+; ZVZIP-NEXT: vmv2r.v v22, v12
+; ZVZIP-NEXT: vmv2r.v v24, v10
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: li a0, 6
+; ZVZIP-NEXT: mul a1, a1, a0
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: vmv1r.v v10, v25
+; ZVZIP-NEXT: vmv1r.v v11, v23
+; ZVZIP-NEXT: vmv1r.v v12, v21
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv1r.v v13, v17
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv1r.v v14, v19
+; ZVZIP-NEXT: vsseg6e16.v v9, (a1)
+; ZVZIP-NEXT: vmv1r.v v9, v24
+; ZVZIP-NEXT: add a5, a1, a2
+; ZVZIP-NEXT: vmv1r.v v10, v22
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: vmv1r.v v11, v20
+; ZVZIP-NEXT: add a4, a3, a2
+; ZVZIP-NEXT: vmv1r.v v12, v16
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v13, v18
+; ZVZIP-NEXT: vsseg6e16.v v8, (a0)
+; ZVZIP-NEXT: vl1re16.v v14, (a1)
+; ZVZIP-NEXT: add a1, a6, a2
+; ZVZIP-NEXT: vl1re16.v v15, (a5)
+; ZVZIP-NEXT: add a5, a1, a2
+; ZVZIP-NEXT: vl1re16.v v18, (a5)
+; ZVZIP-NEXT: add a5, a5, a2
+; ZVZIP-NEXT: vl1re16.v v19, (a5)
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vl1re16.v v16, (a6)
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vl1re16.v v12, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v13, (a6)
+; ZVZIP-NEXT: csrr a6, vlenb
+; ZVZIP-NEXT: li a7, 12
+; ZVZIP-NEXT: mul a6, a6, a7
+; ZVZIP-NEXT: add a6, sp, a6
+; ZVZIP-NEXT: addi a6, a6, 64
+; ZVZIP-NEXT: vl1re16.v v17, (a1)
+; ZVZIP-NEXT: vl1re16.v v10, (a4)
+; ZVZIP-NEXT: vl1re16.v v11, (a5)
+; ZVZIP-NEXT: vl1re16.v v8, (a0)
+; ZVZIP-NEXT: vl1re16.v v9, (a3)
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a2, a6, a2
+; ZVZIP-NEXT: vs4r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a6)
+; ZVZIP-NEXT: vl8re16.v v16, (a2)
+; ZVZIP-NEXT: vl8re16.v v8, (a6)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 48 x half> @llvm.vector.interleave6.nxv48f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x half> %v4, <vscale x 8 x half> %v5)
ret <vscale x 48 x half> %res
}
@@ -10722,6 +12021,79 @@ define <vscale x 48 x bfloat> @vector_interleave_nxv48bf16_nxv8bf16(<vscale x 8
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv48bf16_nxv8bf16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a1, 28
+; ZVZIP-NEXT: mul a0, a0, a1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v20, v14
+; ZVZIP-NEXT: vmv2r.v v22, v12
+; ZVZIP-NEXT: vmv2r.v v24, v10
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: li a0, 6
+; ZVZIP-NEXT: mul a1, a1, a0
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: vmv1r.v v10, v25
+; ZVZIP-NEXT: vmv1r.v v11, v23
+; ZVZIP-NEXT: vmv1r.v v12, v21
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv1r.v v13, v17
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv1r.v v14, v19
+; ZVZIP-NEXT: vsseg6e16.v v9, (a1)
+; ZVZIP-NEXT: vmv1r.v v9, v24
+; ZVZIP-NEXT: add a5, a1, a2
+; ZVZIP-NEXT: vmv1r.v v10, v22
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: vmv1r.v v11, v20
+; ZVZIP-NEXT: add a4, a3, a2
+; ZVZIP-NEXT: vmv1r.v v12, v16
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v13, v18
+; ZVZIP-NEXT: vsseg6e16.v v8, (a0)
+; ZVZIP-NEXT: vl1re16.v v14, (a1)
+; ZVZIP-NEXT: add a1, a6, a2
+; ZVZIP-NEXT: vl1re16.v v15, (a5)
+; ZVZIP-NEXT: add a5, a1, a2
+; ZVZIP-NEXT: vl1re16.v v18, (a5)
+; ZVZIP-NEXT: add a5, a5, a2
+; ZVZIP-NEXT: vl1re16.v v19, (a5)
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vl1re16.v v16, (a6)
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vl1re16.v v12, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v13, (a6)
+; ZVZIP-NEXT: csrr a6, vlenb
+; ZVZIP-NEXT: li a7, 12
+; ZVZIP-NEXT: mul a6, a6, a7
+; ZVZIP-NEXT: add a6, sp, a6
+; ZVZIP-NEXT: addi a6, a6, 64
+; ZVZIP-NEXT: vl1re16.v v17, (a1)
+; ZVZIP-NEXT: vl1re16.v v10, (a4)
+; ZVZIP-NEXT: vl1re16.v v11, (a5)
+; ZVZIP-NEXT: vl1re16.v v8, (a0)
+; ZVZIP-NEXT: vl1re16.v v9, (a3)
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a2, a6, a2
+; ZVZIP-NEXT: vs4r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a6)
+; ZVZIP-NEXT: vl8re16.v v16, (a2)
+; ZVZIP-NEXT: vl8re16.v v8, (a6)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 48 x bfloat> @llvm.vector.interleave6.nxv48bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x bfloat> %v4, <vscale x 8 x bfloat> %v5)
ret <vscale x 48 x bfloat> %res
}
@@ -11235,6 +12607,79 @@ define <vscale x 24 x float> @vector_interleave_nxv24f32_nxv4f32(<vscale x 4 x f
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv24f32_nxv4f32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a1, 28
+; ZVZIP-NEXT: mul a0, a0, a1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v20, v14
+; ZVZIP-NEXT: vmv2r.v v22, v12
+; ZVZIP-NEXT: vmv2r.v v24, v10
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: li a0, 6
+; ZVZIP-NEXT: mul a1, a1, a0
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: vmv1r.v v10, v25
+; ZVZIP-NEXT: vmv1r.v v11, v23
+; ZVZIP-NEXT: vmv1r.v v12, v21
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv1r.v v13, v17
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv1r.v v14, v19
+; ZVZIP-NEXT: vsseg6e32.v v9, (a1)
+; ZVZIP-NEXT: vmv1r.v v9, v24
+; ZVZIP-NEXT: add a5, a1, a2
+; ZVZIP-NEXT: vmv1r.v v10, v22
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: vmv1r.v v11, v20
+; ZVZIP-NEXT: add a4, a3, a2
+; ZVZIP-NEXT: vmv1r.v v12, v16
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v13, v18
+; ZVZIP-NEXT: vsseg6e32.v v8, (a0)
+; ZVZIP-NEXT: vl1re32.v v14, (a1)
+; ZVZIP-NEXT: add a1, a6, a2
+; ZVZIP-NEXT: vl1re32.v v15, (a5)
+; ZVZIP-NEXT: add a5, a1, a2
+; ZVZIP-NEXT: vl1re32.v v18, (a5)
+; ZVZIP-NEXT: add a5, a5, a2
+; ZVZIP-NEXT: vl1re32.v v19, (a5)
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vl1re32.v v16, (a6)
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vl1re32.v v12, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v13, (a6)
+; ZVZIP-NEXT: csrr a6, vlenb
+; ZVZIP-NEXT: li a7, 12
+; ZVZIP-NEXT: mul a6, a6, a7
+; ZVZIP-NEXT: add a6, sp, a6
+; ZVZIP-NEXT: addi a6, a6, 64
+; ZVZIP-NEXT: vl1re32.v v17, (a1)
+; ZVZIP-NEXT: vl1re32.v v10, (a4)
+; ZVZIP-NEXT: vl1re32.v v11, (a5)
+; ZVZIP-NEXT: vl1re32.v v8, (a0)
+; ZVZIP-NEXT: vl1re32.v v9, (a3)
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a2, a6, a2
+; ZVZIP-NEXT: vs4r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a6)
+; ZVZIP-NEXT: vl8re32.v v16, (a2)
+; ZVZIP-NEXT: vl8re32.v v8, (a6)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 24 x float> @llvm.vector.interleave6.nxv24f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x float> %v4, <vscale x 4 x float> %v5)
ret <vscale x 24 x float> %res
}
@@ -11666,6 +13111,79 @@ define <vscale x 12 x double> @vector_interleave_nxv12f64_nxv2f64(<vscale x 2 x
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv12f64_nxv2f64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a1, 28
+; ZVZIP-NEXT: mul a0, a0, a1
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v20, v14
+; ZVZIP-NEXT: vmv2r.v v22, v12
+; ZVZIP-NEXT: vmv2r.v v24, v10
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: li a0, 6
+; ZVZIP-NEXT: mul a1, a1, a0
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: vmv1r.v v10, v25
+; ZVZIP-NEXT: vmv1r.v v11, v23
+; ZVZIP-NEXT: vmv1r.v v12, v21
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv1r.v v13, v17
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv1r.v v14, v19
+; ZVZIP-NEXT: vsseg6e64.v v9, (a1)
+; ZVZIP-NEXT: vmv1r.v v9, v24
+; ZVZIP-NEXT: add a5, a1, a2
+; ZVZIP-NEXT: vmv1r.v v10, v22
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: vmv1r.v v11, v20
+; ZVZIP-NEXT: add a4, a3, a2
+; ZVZIP-NEXT: vmv1r.v v12, v16
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v13, v18
+; ZVZIP-NEXT: vsseg6e64.v v8, (a0)
+; ZVZIP-NEXT: vl1re64.v v14, (a1)
+; ZVZIP-NEXT: add a1, a6, a2
+; ZVZIP-NEXT: vl1re64.v v15, (a5)
+; ZVZIP-NEXT: add a5, a1, a2
+; ZVZIP-NEXT: vl1re64.v v18, (a5)
+; ZVZIP-NEXT: add a5, a5, a2
+; ZVZIP-NEXT: vl1re64.v v19, (a5)
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vl1re64.v v16, (a6)
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vl1re64.v v12, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v13, (a6)
+; ZVZIP-NEXT: csrr a6, vlenb
+; ZVZIP-NEXT: li a7, 12
+; ZVZIP-NEXT: mul a6, a6, a7
+; ZVZIP-NEXT: add a6, sp, a6
+; ZVZIP-NEXT: addi a6, a6, 64
+; ZVZIP-NEXT: vl1re64.v v17, (a1)
+; ZVZIP-NEXT: vl1re64.v v10, (a4)
+; ZVZIP-NEXT: vl1re64.v v11, (a5)
+; ZVZIP-NEXT: vl1re64.v v8, (a0)
+; ZVZIP-NEXT: vl1re64.v v9, (a3)
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a2, a6, a2
+; ZVZIP-NEXT: vs4r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a6)
+; ZVZIP-NEXT: vl8re64.v v16, (a2)
+; ZVZIP-NEXT: vl8re64.v v8, (a6)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 12 x double> @llvm.vector.interleave6.nxv12f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x double> %v4, <vscale x 2 x double> %v5)
ret <vscale x 12 x double> %res
}
@@ -12223,6 +13741,87 @@ define <vscale x 56 x half> @vector_interleave_nxv56f16_nxv8f16(<vscale x 8 x ha
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv56f16_nxv8f16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 5
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v26, v20
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv2r.v v24, v16
+; ZVZIP-NEXT: vmv2r.v v22, v12
+; ZVZIP-NEXT: vmv2r.v v20, v8
+; ZVZIP-NEXT: vmv1r.v v1, v20
+; ZVZIP-NEXT: vmv1r.v v3, v22
+; ZVZIP-NEXT: vmv1r.v v5, v24
+; ZVZIP-NEXT: vmv1r.v v7, v26
+; ZVZIP-NEXT: vmv1r.v v2, v10
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: slli a2, a1, 3
+; ZVZIP-NEXT: sub a1, a2, a1
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv1r.v v4, v14
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: add a4, a1, a2
+; ZVZIP-NEXT: vmv1r.v v6, v18
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vmv1r.v v22, v11
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v24, v15
+; ZVZIP-NEXT: vsseg7e16.v v1, (a0)
+; ZVZIP-NEXT: vmv1r.v v26, v19
+; ZVZIP-NEXT: vsseg7e16.v v21, (a1)
+; ZVZIP-NEXT: vl1re16.v v18, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v19, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v20, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v21, (a6)
+; ZVZIP-NEXT: add a6, a3, a2
+; ZVZIP-NEXT: vl1re16.v v10, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v11, (a6)
+; ZVZIP-NEXT: vl1re16.v v8, (a0)
+; ZVZIP-NEXT: vl1re16.v v16, (a4)
+; ZVZIP-NEXT: vl1re16.v v9, (a3)
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a3, 14
+; ZVZIP-NEXT: mul a0, a0, a3
+; ZVZIP-NEXT: add a0, sp, a0
+; ZVZIP-NEXT: addi a0, a0, 64
+; ZVZIP-NEXT: vl1re16.v v17, (a5)
+; ZVZIP-NEXT: slli a3, a2, 2
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v12, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v13, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a3, a2, a3
+; ZVZIP-NEXT: add a2, a0, a2
+; ZVZIP-NEXT: vl1re16.v v14, (a6)
+; ZVZIP-NEXT: vl1re16.v v15, (a1)
+; ZVZIP-NEXT: add a3, a0, a3
+; ZVZIP-NEXT: vs2r.v v20, (a3)
+; ZVZIP-NEXT: vs4r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a0)
+; ZVZIP-NEXT: vl8re16.v v16, (a2)
+; ZVZIP-NEXT: vl8re16.v v8, (a0)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 56 x half> @llvm.vector.interleave7.nxv56f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x half> %v4, <vscale x 8 x half> %v5, <vscale x 8 x half> %v6)
ret <vscale x 56 x half> %res
}
@@ -12780,6 +14379,87 @@ define <vscale x 56 x bfloat> @vector_interleave_nxv56bf16_nxv8bf16(<vscale x 8
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv56bf16_nxv8bf16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 5
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v26, v20
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv2r.v v24, v16
+; ZVZIP-NEXT: vmv2r.v v22, v12
+; ZVZIP-NEXT: vmv2r.v v20, v8
+; ZVZIP-NEXT: vmv1r.v v1, v20
+; ZVZIP-NEXT: vmv1r.v v3, v22
+; ZVZIP-NEXT: vmv1r.v v5, v24
+; ZVZIP-NEXT: vmv1r.v v7, v26
+; ZVZIP-NEXT: vmv1r.v v2, v10
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: slli a2, a1, 3
+; ZVZIP-NEXT: sub a1, a2, a1
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv1r.v v4, v14
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: add a4, a1, a2
+; ZVZIP-NEXT: vmv1r.v v6, v18
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vmv1r.v v22, v11
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v24, v15
+; ZVZIP-NEXT: vsseg7e16.v v1, (a0)
+; ZVZIP-NEXT: vmv1r.v v26, v19
+; ZVZIP-NEXT: vsseg7e16.v v21, (a1)
+; ZVZIP-NEXT: vl1re16.v v18, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v19, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v20, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v21, (a6)
+; ZVZIP-NEXT: add a6, a3, a2
+; ZVZIP-NEXT: vl1re16.v v10, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v11, (a6)
+; ZVZIP-NEXT: vl1re16.v v8, (a0)
+; ZVZIP-NEXT: vl1re16.v v16, (a4)
+; ZVZIP-NEXT: vl1re16.v v9, (a3)
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a3, 14
+; ZVZIP-NEXT: mul a0, a0, a3
+; ZVZIP-NEXT: add a0, sp, a0
+; ZVZIP-NEXT: addi a0, a0, 64
+; ZVZIP-NEXT: vl1re16.v v17, (a5)
+; ZVZIP-NEXT: slli a3, a2, 2
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v12, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re16.v v13, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a3, a2, a3
+; ZVZIP-NEXT: add a2, a0, a2
+; ZVZIP-NEXT: vl1re16.v v14, (a6)
+; ZVZIP-NEXT: vl1re16.v v15, (a1)
+; ZVZIP-NEXT: add a3, a0, a3
+; ZVZIP-NEXT: vs2r.v v20, (a3)
+; ZVZIP-NEXT: vs4r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a0)
+; ZVZIP-NEXT: vl8re16.v v16, (a2)
+; ZVZIP-NEXT: vl8re16.v v8, (a0)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 56 x bfloat> @llvm.vector.interleave7.nxv56bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x bfloat> %v4, <vscale x 8 x bfloat> %v5, <vscale x 8 x bfloat> %v6)
ret <vscale x 56 x bfloat> %res
}
@@ -13337,6 +15017,87 @@ define <vscale x 28 x float> @vector_interleave_nxv28f32_nxv4f32(<vscale x 4 x f
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv28f32_nxv4f32:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 5
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v26, v20
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv2r.v v24, v16
+; ZVZIP-NEXT: vmv2r.v v22, v12
+; ZVZIP-NEXT: vmv2r.v v20, v8
+; ZVZIP-NEXT: vmv1r.v v1, v20
+; ZVZIP-NEXT: vmv1r.v v3, v22
+; ZVZIP-NEXT: vmv1r.v v5, v24
+; ZVZIP-NEXT: vmv1r.v v7, v26
+; ZVZIP-NEXT: vmv1r.v v2, v10
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: slli a2, a1, 3
+; ZVZIP-NEXT: sub a1, a2, a1
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv1r.v v4, v14
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: add a4, a1, a2
+; ZVZIP-NEXT: vmv1r.v v6, v18
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vmv1r.v v22, v11
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v24, v15
+; ZVZIP-NEXT: vsseg7e32.v v1, (a0)
+; ZVZIP-NEXT: vmv1r.v v26, v19
+; ZVZIP-NEXT: vsseg7e32.v v21, (a1)
+; ZVZIP-NEXT: vl1re32.v v18, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v19, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v20, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v21, (a6)
+; ZVZIP-NEXT: add a6, a3, a2
+; ZVZIP-NEXT: vl1re32.v v10, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v11, (a6)
+; ZVZIP-NEXT: vl1re32.v v8, (a0)
+; ZVZIP-NEXT: vl1re32.v v16, (a4)
+; ZVZIP-NEXT: vl1re32.v v9, (a3)
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a3, 14
+; ZVZIP-NEXT: mul a0, a0, a3
+; ZVZIP-NEXT: add a0, sp, a0
+; ZVZIP-NEXT: addi a0, a0, 64
+; ZVZIP-NEXT: vl1re32.v v17, (a5)
+; ZVZIP-NEXT: slli a3, a2, 2
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v12, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re32.v v13, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a3, a2, a3
+; ZVZIP-NEXT: add a2, a0, a2
+; ZVZIP-NEXT: vl1re32.v v14, (a6)
+; ZVZIP-NEXT: vl1re32.v v15, (a1)
+; ZVZIP-NEXT: add a3, a0, a3
+; ZVZIP-NEXT: vs2r.v v20, (a3)
+; ZVZIP-NEXT: vs4r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a0)
+; ZVZIP-NEXT: vl8re32.v v16, (a2)
+; ZVZIP-NEXT: vl8re32.v v8, (a0)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 28 x float> @llvm.vector.interleave7.nxv28f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x float> %v4, <vscale x 4 x float> %v5, <vscale x 4 x float> %v6)
ret <vscale x 28 x float> %res
}
@@ -13812,6 +15573,87 @@ define <vscale x 14 x double> @vector_interleave_nxv14f64_nxv2f64(<vscale x 2 x
; ZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZIP-NEXT: addi sp, sp, 80
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: vector_interleave_nxv14f64_nxv2f64:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: addi sp, sp, -80
+; ZVZIP-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVZIP-NEXT: addi s0, sp, 80
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: slli a0, a0, 5
+; ZVZIP-NEXT: sub sp, sp, a0
+; ZVZIP-NEXT: andi sp, sp, -64
+; ZVZIP-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; ZVZIP-NEXT: vmv2r.v v26, v20
+; ZVZIP-NEXT: addi a0, sp, 64
+; ZVZIP-NEXT: vmv2r.v v24, v16
+; ZVZIP-NEXT: vmv2r.v v22, v12
+; ZVZIP-NEXT: vmv2r.v v20, v8
+; ZVZIP-NEXT: vmv1r.v v1, v20
+; ZVZIP-NEXT: vmv1r.v v3, v22
+; ZVZIP-NEXT: vmv1r.v v5, v24
+; ZVZIP-NEXT: vmv1r.v v7, v26
+; ZVZIP-NEXT: vmv1r.v v2, v10
+; ZVZIP-NEXT: csrr a1, vlenb
+; ZVZIP-NEXT: slli a2, a1, 3
+; ZVZIP-NEXT: sub a1, a2, a1
+; ZVZIP-NEXT: add a1, sp, a1
+; ZVZIP-NEXT: addi a1, a1, 64
+; ZVZIP-NEXT: csrr a2, vlenb
+; ZVZIP-NEXT: vmv1r.v v4, v14
+; ZVZIP-NEXT: add a3, a0, a2
+; ZVZIP-NEXT: add a4, a1, a2
+; ZVZIP-NEXT: vmv1r.v v6, v18
+; ZVZIP-NEXT: add a5, a4, a2
+; ZVZIP-NEXT: vmv1r.v v22, v11
+; ZVZIP-NEXT: add a6, a5, a2
+; ZVZIP-NEXT: vmv1r.v v24, v15
+; ZVZIP-NEXT: vsseg7e64.v v1, (a0)
+; ZVZIP-NEXT: vmv1r.v v26, v19
+; ZVZIP-NEXT: vsseg7e64.v v21, (a1)
+; ZVZIP-NEXT: vl1re64.v v18, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v19, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v20, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v21, (a6)
+; ZVZIP-NEXT: add a6, a3, a2
+; ZVZIP-NEXT: vl1re64.v v10, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v11, (a6)
+; ZVZIP-NEXT: vl1re64.v v8, (a0)
+; ZVZIP-NEXT: vl1re64.v v16, (a4)
+; ZVZIP-NEXT: vl1re64.v v9, (a3)
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: li a3, 14
+; ZVZIP-NEXT: mul a0, a0, a3
+; ZVZIP-NEXT: add a0, sp, a0
+; ZVZIP-NEXT: addi a0, a0, 64
+; ZVZIP-NEXT: vl1re64.v v17, (a5)
+; ZVZIP-NEXT: slli a3, a2, 2
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v12, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: vl1re64.v v13, (a6)
+; ZVZIP-NEXT: add a6, a6, a2
+; ZVZIP-NEXT: slli a2, a2, 3
+; ZVZIP-NEXT: add a3, a2, a3
+; ZVZIP-NEXT: add a2, a0, a2
+; ZVZIP-NEXT: vl1re64.v v14, (a6)
+; ZVZIP-NEXT: vl1re64.v v15, (a1)
+; ZVZIP-NEXT: add a3, a0, a3
+; ZVZIP-NEXT: vs2r.v v20, (a3)
+; ZVZIP-NEXT: vs4r.v v16, (a2)
+; ZVZIP-NEXT: vs8r.v v8, (a0)
+; ZVZIP-NEXT: vl8re64.v v16, (a2)
+; ZVZIP-NEXT: vl8re64.v v8, (a0)
+; ZVZIP-NEXT: addi sp, s0, -80
+; ZVZIP-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVZIP-NEXT: addi sp, sp, 80
+; ZVZIP-NEXT: ret
%res = call <vscale x 14 x double> @llvm.vector.interleave7.nxv14f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x double> %v4, <vscale x 2 x double> %v5, <vscale x 2 x double> %v6)
ret <vscale x 14 x double> %res
}
@@ -14966,6 +16808,19 @@ define <vscale x 4 x i16> @interleave2_diff_const_splat_nxv4i16() {
; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; ZIP-NEXT: vslideup.vx v8, v11, a0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave2_diff_const_splat_nxv4i16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVZIP-NEXT: vmv.v.i v9, 4
+; ZVZIP-NEXT: vmv.v.i v10, 3
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: vzip.vv v8, v10, v9
+; ZVZIP-NEXT: srli a0, a0, 2
+; ZVZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVZIP-NEXT: vslidedown.vx v9, v8, a0
+; ZVZIP-NEXT: vslideup.vx v8, v9, a0
+; ZVZIP-NEXT: ret
%retval = call <vscale x 4 x i16> @llvm.vector.interleave2.v4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 4))
ret <vscale x 4 x i16> %retval
}
@@ -15028,6 +16883,19 @@ define <vscale x 4 x i16> @interleave2_diff_nonconst_splat_nxv4i16(i16 %a, i16 %
; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; ZIP-NEXT: vslideup.vx v8, v11, a0
; ZIP-NEXT: ret
+;
+; ZVZIP-LABEL: interleave2_diff_nonconst_splat_nxv4i16:
+; ZVZIP: # %bb.0:
+; ZVZIP-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; ZVZIP-NEXT: vmv.v.x v9, a0
+; ZVZIP-NEXT: vmv.v.x v10, a1
+; ZVZIP-NEXT: csrr a0, vlenb
+; ZVZIP-NEXT: vzip.vv v8, v9, v10
+; ZVZIP-NEXT: srli a0, a0, 2
+; ZVZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVZIP-NEXT: vslidedown.vx v9, v8, a0
+; ZVZIP-NEXT: vslideup.vx v8, v9, a0
+; ZVZIP-NEXT: ret
%ins1 = insertelement <vscale x 2 x i16> poison, i16 %a, i32 0
%splat1 = shufflevector <vscale x 2 x i16> %ins1, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
%ins2 = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
>From 2a5e83896f49e7909304d17d91879a9c91f7b25b Mon Sep 17 00:00:00 2001
From: wangboyao <wangboyao at bytedance.com>
Date: Fri, 3 Apr 2026 15:01:35 +0800
Subject: [PATCH 2/4] document Zvzip SDTypeProfiles, simplify LMUL checks, and
revert lowering helpers to static functions
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 160 +++++++++---------
llvm/lib/Target/RISCV/RISCVISelLowering.h | 6 +-
llvm/lib/Target/RISCV/RISCVInstrInfoZvzip.td | 5 +
.../fixed-vectors-shuffle-deinterleave2.ll | 54 +++---
4 files changed, 109 insertions(+), 116 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f38f193d5ffcc..19d7d3e9b1fc3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -5207,14 +5207,13 @@ static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2) {
return SDValue();
}
-bool RISCVTargetLowering::isLegalVTForZvzip(MVT VT) const {
+static bool isLegalVTForZvzip(MVT VT, const RISCVSubtarget &Subtarget,
+ const TargetLowering &TLI) {
MVT ContainerVT = VT;
if (VT.isFixedLengthVector())
- ContainerVT = getContainerForFixedLengthVector(VT);
+ ContainerVT = getContainerForFixedLengthVector(TLI, VT, Subtarget);
// Determine LMUL of the container vector.
- if (llvm::RISCVTargetLowering::getLMUL(ContainerVT) == RISCVVType::LMUL_8)
- return false;
- return true;
+ return RISCVTargetLowering::getLMUL(ContainerVT) != RISCVVType::LMUL_8;
}
/// Is this shuffle interleaving contiguous elements from one vector into the
@@ -5223,15 +5222,15 @@ bool RISCVTargetLowering::isLegalVTForZvzip(MVT VT) const {
/// even element. \p OddSrc will contain the element that should be in the first
/// odd element. These can be the first element in a source or the element half
/// way through the source.
-bool RISCVTargetLowering::isInterleaveShuffle(
- ArrayRef<int> Mask, MVT VT, int &EvenSrc, int &OddSrc,
- const RISCVSubtarget &Subtarget) const {
+static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
+ int &OddSrc, const RISCVSubtarget &Subtarget,
+ const TargetLowering &TLI) {
// We need to be able to widen elements to the next larger integer type or
- // use the zip2a instruction at e64.
+ // use the zip2a/vzip instruction at e64.
if (VT.getScalarSizeInBits() >= Subtarget.getELen()) {
if (!Subtarget.hasVendorXRivosVizip() && !Subtarget.hasStdExtZvzip())
return false;
- if (Subtarget.hasStdExtZvzip() && !isLegalVTForZvzip(VT))
+ if (Subtarget.hasStdExtZvzip() && !isLegalVTForZvzip(VT, Subtarget, TLI))
return false;
}
@@ -5765,12 +5764,16 @@ static SDValue lowerZvzipVUNZIP(unsigned Opc, SDValue Op, const SDLoc &DL,
MVT ContainerVT = IntVT;
if (VT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
+ // For E64 with LMUL <= 1, we can't represent a smaller fractional LMUL for
+ // the result (LMUL <= 1/2 is not valid for E64). We must widen the input
+ // container to at least LMUL=2 so the result can be LMUL=1.
+ if (ContainerVT.getVectorElementType() == MVT::i64 &&
+ RISCVTargetLowering::getLMUL(ContainerVT) == RISCVVType::LMUL_1) {
+ ContainerVT = MVT::getScalableVectorVT(MVT::i64, 2);
+ }
Op = convertToScalableVector(ContainerVT, Op, DAG, Subtarget);
}
- // We can't handle E64 with LMUL <= 1.
- if (ContainerVT.getVectorElementType() == MVT::i64 &&
- RISCVTargetLowering::getLMUL(ContainerVT) == RISCVVType::LMUL_1)
- return SDValue();
+
MVT ResVT = ContainerVT.getHalfNumVectorElementsVT();
MVT HalfVT = VT.getHalfNumVectorElementsVT();
MVT HalfIntVT = IntVT.getHalfNumVectorElementsVT();
@@ -6285,8 +6288,8 @@ static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG) {
return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
}
-SDValue RISCVTargetLowering::lowerVECTOR_SHUFFLE(
- SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) const {
+SDValue RISCVTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) const {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDLoc DL(Op);
@@ -6313,7 +6316,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_SHUFFLE(
ISD::SETNE);
}
- MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ MVT ContainerVT = getContainerForFixedLengthVector(VT);
// Store the return value in a single variable instead of structured bindings
// so that we can pass it to GetSlide below, which cannot capture structured
@@ -6481,24 +6484,26 @@ SDValue RISCVTargetLowering::lowerVECTOR_SHUFFLE(
if ((Subtarget.hasVendorXRivosVizip() || Subtarget.hasStdExtZvzip()) &&
ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, 2, Index) &&
1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
- if (Subtarget.hasStdExtZvzip() && isLegalVTForZvzip(VT)) {
+ if (Subtarget.hasStdExtZvzip() && isLegalVTForZvzip(VT, Subtarget, *this)) {
unsigned Opc = Index == 0 ? RISCVISD::VUNZIPE_VL : RISCVISD::VUNZIPO_VL;
EVT NewVT = VT.getDoubleNumVectorElementsVT();
- SDValue Op;
- if (V2.isUndef()) {
- Op = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, V1, V2);
- }
- if (auto VLEN = Subtarget.getRealVLen();
- VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0)
- if (SDValue V = lowerZvzipVUNZIP(
- Opc, DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, V1, V2), DL,
- DAG, Subtarget))
- return V;
- if (SDValue Src = foldConcatVector(V1, V2))
- Op = DAG.getExtractSubvector(DL, NewVT, Src, 0);
- if (Op) {
- if (SDValue Res = lowerZvzipVUNZIP(Opc, Op, DL, DAG, Subtarget))
- return DAG.getExtractSubvector(DL, VT, Res, 0);
+ if (isTypeLegal(NewVT)) {
+ SDValue Op;
+ if (V2.isUndef()) {
+ Op = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, V1, V2);
+ }
+ if (auto VLEN = Subtarget.getRealVLen();
+ VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0)
+ if (SDValue V = lowerZvzipVUNZIP(
+ Opc, DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, V1, V2), DL,
+ DAG, Subtarget))
+ return V;
+ if (SDValue Src = foldConcatVector(V1, V2))
+ Op = DAG.getExtractSubvector(DL, NewVT, Src, 0);
+ if (Op) {
+ if (SDValue Res = lowerZvzipVUNZIP(Opc, Op, DL, DAG, Subtarget))
+ return DAG.getExtractSubvector(DL, VT, Res, 0);
+ }
}
// Deinterleave each source and concatenate them, or concat first, then
@@ -6512,54 +6517,49 @@ SDValue RISCVTargetLowering::lowerVECTOR_SHUFFLE(
if (Lo && Hi)
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
}
- goto fallback;
- }
-
- if (!Subtarget.hasVendorXRivosVizip())
- goto fallback;
-
- unsigned Opc =
- Index == 0 ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL;
- if (V2.isUndef())
- return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
- if (auto VLEN = Subtarget.getRealVLen();
- VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0)
- return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
- if (SDValue Src = foldConcatVector(V1, V2)) {
- EVT NewVT = VT.getDoubleNumVectorElementsVT();
- Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
- SDValue Res =
- lowerVZIP(Opc, Src, DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
- return DAG.getExtractSubvector(DL, VT, Res, 0);
- }
- // Deinterleave each source and concatenate them, or concat first, then
- // deinterleave.
- if (1 < count_if(Mask,
- [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
- 1 < count_if(Mask,
- [&Mask](int Idx) { return Idx >= (int)Mask.size(); })) {
-
- const unsigned EltSize = VT.getScalarSizeInBits();
- const unsigned MinVLMAX = Subtarget.getRealMinVLen() / EltSize;
- if (NumElts < MinVLMAX) {
- MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
- SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
+ } else if (Subtarget.hasVendorXRivosVizip()) {
+ unsigned Opc =
+ Index == 0 ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL;
+ if (V2.isUndef())
+ return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
+ if (auto VLEN = Subtarget.getRealVLen();
+ VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0)
+ return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
+ if (SDValue Src = foldConcatVector(V1, V2)) {
+ EVT NewVT = VT.getDoubleNumVectorElementsVT();
+ Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
SDValue Res =
- lowerVZIP(Opc, Concat, DAG.getUNDEF(ConcatVT), DL, DAG, Subtarget);
+ lowerVZIP(Opc, Src, DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
return DAG.getExtractSubvector(DL, VT, Res, 0);
}
+ // Deinterleave each source and concatenate them, or concat first, then
+ // deinterleave.
+ if (1 < count_if(Mask,
+ [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
+ 1 < count_if(Mask,
+ [&Mask](int Idx) { return Idx >= (int)Mask.size(); })) {
- SDValue Lo = lowerVZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget);
- SDValue Hi = lowerVZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget);
+ const unsigned EltSize = VT.getScalarSizeInBits();
+ const unsigned MinVLMAX = Subtarget.getRealMinVLen() / EltSize;
+ if (NumElts < MinVLMAX) {
+ MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
+ SDValue Res =
+ lowerVZIP(Opc, Concat, DAG.getUNDEF(ConcatVT), DL, DAG, Subtarget);
+ return DAG.getExtractSubvector(DL, VT, Res, 0);
+ }
- MVT SubVT = VT.getHalfNumVectorElementsVT();
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
- DAG.getExtractSubvector(DL, SubVT, Lo, 0),
- DAG.getExtractSubvector(DL, SubVT, Hi, 0));
+ SDValue Lo = lowerVZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget);
+ SDValue Hi = lowerVZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget);
+
+ MVT SubVT = VT.getHalfNumVectorElementsVT();
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
+ DAG.getExtractSubvector(DL, SubVT, Lo, 0),
+ DAG.getExtractSubvector(DL, SubVT, Hi, 0));
+ }
}
}
-fallback:
if (SDValue V =
lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
return V;
@@ -6567,7 +6567,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_SHUFFLE(
// Detect an interleave shuffle and lower to
// (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
int EvenSrc, OddSrc;
- if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget) &&
+ if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget, *this) &&
!(NumElts == 2 &&
ShuffleVectorInst::isSingleSourceMask(Mask, Mask.size()))) {
// Extract the halves of the vectors.
@@ -6599,8 +6599,8 @@ SDValue RISCVTargetLowering::lowerVECTOR_SHUFFLE(
}
// Prefer vzip2a or vzip if available.
- // TODO: Extend to matching zip2b or vzip if EvenSrc and OddSrc allow.
- if (Subtarget.hasStdExtZvzip() && isLegalVTForZvzip(VT)) {
+ // TODO: Extend to matching ri.vzip2b or vzip if EvenSrc and OddSrc allow.
+ if (Subtarget.hasStdExtZvzip() && isLegalVTForZvzip(VT, Subtarget, *this)) {
EvenV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
EvenV, DAG.getVectorIdxConstant(0, DL));
OddV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), OddV,
@@ -7005,7 +7005,7 @@ bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
return ShuffleVectorInst::isReverseMask(M, NumElts) ||
(::isMaskedSlidePair(M, SrcInfo) &&
isElementRotate(SrcInfo, NumElts)) ||
- isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
+ isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget, *this);
}
// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
@@ -8672,7 +8672,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return SDValue();
}
case ISD::VECTOR_SHUFFLE:
- return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
+ return lowerVECTOR_SHUFFLE(Op, DAG);
case ISD::CONCAT_VECTORS: {
// Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
// better than going through the stack, as the default expansion does.
@@ -13119,7 +13119,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
if (Subtarget.hasStdExtZvzip() && Factor == 2) {
MVT VT = Op->getSimpleValueType(0);
- if (isLegalVTForZvzip(VT)) {
+ if (isLegalVTForZvzip(VT, Subtarget, *this)) {
SDValue V1 = Op->getOperand(0);
SDValue V2 = Op->getOperand(1);
// Freeze the sources so we can increase their use count.
@@ -13400,7 +13400,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
if (Subtarget.hasStdExtZvzip() && !Op.getOperand(0).isUndef() &&
!Op.getOperand(1).isUndef()) {
MVT VT = Op->getSimpleValueType(0);
- if (isLegalVTForZvzip(VT)) {
+ if (isLegalVTForZvzip(VT, Subtarget, *this)) {
// Freeze the sources so we can increase their use count.
SDValue V1 = DAG.getFreeze(Op->getOperand(0));
SDValue V2 = DAG.getFreeze(Op->getOperand(1));
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 1f5cc09abc11e..44601984bd6a2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -583,11 +583,7 @@ class RISCVTargetLowering : public TargetLowering {
SDValue lowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPARTIAL_REDUCE_MLA(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) const;
- bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
- int &OddSrc, const RISCVSubtarget &Subtarget) const;
- bool isLegalVTForZvzip(MVT VT) const;
+ SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerXAndesBfHCvtBFloat16Load(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerXAndesBfHCvtBFloat16Store(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvzip.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvzip.td
index ab507f54c3b90..a3a524d67142a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvzip.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvzip.td
@@ -30,6 +30,8 @@ let Predicates = [HasStdExtZvzip], Constraints = "@earlyclobber $vd" in {
}
} // Predicates = [HasStdExtZvzip]
+// Returns a vector. Operand 0 and 1 are vectors, operand 2 is a passthru,
+// operand 3 is a mask, operand 4 is VL.
def SDT_RISCVZip_VL : SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCisInt<1>,
SDTCisSubVecOfVec<1, 0>,
@@ -38,6 +40,9 @@ def SDT_RISCVZip_VL : SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisVec<1>,
SDTCVecEltisVT<4, i1>,
SDTCisSameNumEltsAs<1, 4>,
SDTCisVT<5, XLenVT>]>;
+
+// Returns a vector. Operand 0 is a vector, operand 1 is a passthru,
+// operand 2 is a mask, operand 3 is VL.
def SDT_RISCVUnzip_VL : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCisInt<1>,
SDTCisSubVecOfVec<0, 1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
index c31c283460c61..4dab01b3dfa9a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
@@ -433,9 +433,8 @@ define void @vnsrl_0_i64(ptr %in, ptr %out) {
; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; ZVZIP-NEXT: vle64.v v8, (a0)
; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; ZVZIP-NEXT: vslidedown.vi v9, v8, 2
-; ZVZIP-NEXT: vslideup.vi v8, v9, 1
-; ZVZIP-NEXT: vse64.v v8, (a1)
+; ZVZIP-NEXT: vunzipe.v v10, v8
+; ZVZIP-NEXT: vse64.v v10, (a1)
; ZVZIP-NEXT: ret
entry:
%0 = load <4 x i64>, ptr %in, align 8
@@ -478,8 +477,7 @@ define void @vnsrl_64_i64(ptr %in, ptr %out) {
; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; ZVZIP-NEXT: vle64.v v8, (a0)
; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; ZVZIP-NEXT: vslidedown.vi v9, v8, 2
-; ZVZIP-NEXT: vpairo.vv v10, v8, v9
+; ZVZIP-NEXT: vunzipo.v v10, v8
; ZVZIP-NEXT: vse64.v v10, (a1)
; ZVZIP-NEXT: ret
entry:
@@ -522,9 +520,8 @@ define void @vnsrl_0_double(ptr %in, ptr %out) {
; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; ZVZIP-NEXT: vle64.v v8, (a0)
; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; ZVZIP-NEXT: vslidedown.vi v9, v8, 2
-; ZVZIP-NEXT: vslideup.vi v8, v9, 1
-; ZVZIP-NEXT: vse64.v v8, (a1)
+; ZVZIP-NEXT: vunzipe.v v10, v8
+; ZVZIP-NEXT: vse64.v v10, (a1)
; ZVZIP-NEXT: ret
entry:
%0 = load <4 x double>, ptr %in, align 8
@@ -567,8 +564,7 @@ define void @vnsrl_64_double(ptr %in, ptr %out) {
; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; ZVZIP-NEXT: vle64.v v8, (a0)
; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; ZVZIP-NEXT: vslidedown.vi v9, v8, 2
-; ZVZIP-NEXT: vpairo.vv v10, v8, v9
+; ZVZIP-NEXT: vunzipo.v v10, v8
; ZVZIP-NEXT: vse64.v v10, (a1)
; ZVZIP-NEXT: ret
entry:
@@ -1698,13 +1694,13 @@ define <4 x i64> @unzip2a_dual_v4i64(<4 x i64> %a, <4 x i64> %b) {
;
; ZVZIP-LABEL: unzip2a_dual_v4i64:
; ZVZIP: # %bb.0: # %entry
-; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, ta, mu
-; ZVZIP-NEXT: vmv.v.i v0, 8
-; ZVZIP-NEXT: vslideup.vi v10, v9, 2
-; ZVZIP-NEXT: vslideup.vi v10, v9, 1, v0.t
-; ZVZIP-NEXT: vmv.v.i v0, 12
-; ZVZIP-NEXT: vunzipe.v v11, v8
-; ZVZIP-NEXT: vmerge.vvm v8, v11, v10, v0
+; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZVZIP-NEXT: vmv1r.v v10, v9
+; ZVZIP-NEXT: vunzipe.v v12, v10
+; ZVZIP-NEXT: vunzipe.v v10, v8
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; ZVZIP-NEXT: vslideup.vi v10, v12, 2
+; ZVZIP-NEXT: vmv.v.v v8, v10
; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -1959,16 +1955,13 @@ define <4 x i64> @unzip2a_dual_v4i64_exact_nf2(<4 x i64> %a, <4 x i64> %b) vscal
;
; ZVZIP-LABEL: unzip2a_dual_v4i64_exact_nf2:
; ZVZIP: # %bb.0: # %entry
-; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, ta, mu
-; ZVZIP-NEXT: vmv.v.i v0, 8
-; ZVZIP-NEXT: vslideup.vi v12, v9, 2
-; ZVZIP-NEXT: vslideup.vi v12, v9, 1, v0.t
; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; ZVZIP-NEXT: vslidedown.vi v9, v8, 2
-; ZVZIP-NEXT: vmv.v.i v0, 12
-; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
-; ZVZIP-NEXT: vzip.vv v10, v8, v9
-; ZVZIP-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVZIP-NEXT: vmv1r.v v10, v9
+; ZVZIP-NEXT: vunzipe.v v12, v10
+; ZVZIP-NEXT: vunzipe.v v10, v8
+; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, tu, ma
+; ZVZIP-NEXT: vslideup.vi v10, v12, 2
+; ZVZIP-NEXT: vmv1r.v v8, v10
; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -2160,12 +2153,11 @@ define <4 x i64> @unzip2b_dual_v4i64(<4 x i64> %a, <4 x i64> %b) {
; ZVZIP: # %bb.0: # %entry
; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; ZVZIP-NEXT: vmv1r.v v10, v9
-; ZVZIP-NEXT: vslidedown.vi v11, v9, 2
+; ZVZIP-NEXT: vunzipo.v v12, v10
+; ZVZIP-NEXT: vunzipo.v v10, v8
; ZVZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
-; ZVZIP-NEXT: vunzipo.v v12, v8
-; ZVZIP-NEXT: vmv.v.i v0, 12
-; ZVZIP-NEXT: vzip.vv v8, v10, v11
-; ZVZIP-NEXT: vmerge.vvm v8, v12, v8, v0
+; ZVZIP-NEXT: vslideup.vi v10, v12, 2
+; ZVZIP-NEXT: vmv.v.v v8, v10
; ZVZIP-NEXT: ret
entry:
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
>From 80292f90c17507b5a8aeb71a8f96941c33ffb94f Mon Sep 17 00:00:00 2001
From: wangboyao <wangboyao at bytedance.com>
Date: Fri, 3 Apr 2026 15:07:45 +0800
Subject: [PATCH 3/4] format cleanup
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 19d7d3e9b1fc3..4a99acba7a024 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -6289,7 +6289,7 @@ static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG) {
}
SDValue RISCVTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDLoc DL(Op);
@@ -6543,9 +6543,10 @@ SDValue RISCVTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
const unsigned MinVLMAX = Subtarget.getRealMinVLen() / EltSize;
if (NumElts < MinVLMAX) {
MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
- SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
- SDValue Res =
- lowerVZIP(Opc, Concat, DAG.getUNDEF(ConcatVT), DL, DAG, Subtarget);
+ SDValue Concat =
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
+ SDValue Res = lowerVZIP(Opc, Concat, DAG.getUNDEF(ConcatVT), DL, DAG,
+ Subtarget);
return DAG.getExtractSubvector(DL, VT, Res, 0);
}
>From 7625f1c5d7d028b633967a83c0d3916d7496fc1d Mon Sep 17 00:00:00 2001
From: wangboyao <wangboyao at bytedance.com>
Date: Fri, 3 Apr 2026 15:58:00 +0800
Subject: [PATCH 4/4] simply comments
---
llvm/lib/Target/RISCV/RISCVInstrInfoZvzip.td | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvzip.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvzip.td
index a3a524d67142a..b22d6b2894236 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvzip.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvzip.td
@@ -30,8 +30,7 @@ let Predicates = [HasStdExtZvzip], Constraints = "@earlyclobber $vd" in {
}
} // Predicates = [HasStdExtZvzip]
-// Returns a vector. Operand 0 and 1 are vectors, operand 2 is a passthru,
-// operand 3 is a mask, operand 4 is VL.
+// (vd (op vs2, vs1, passthru, mask, vl))
def SDT_RISCVZip_VL : SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCisInt<1>,
SDTCisSubVecOfVec<1, 0>,
@@ -41,8 +40,7 @@ def SDT_RISCVZip_VL : SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisSameNumEltsAs<1, 4>,
SDTCisVT<5, XLenVT>]>;
-// Returns a vector. Operand 0 is a vector, operand 1 is a passthru,
-// operand 2 is a mask, operand 3 is VL.
+// (vd (op vs2, passthru, mask, vl))
def SDT_RISCVUnzip_VL : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCisInt<1>,
SDTCisSubVecOfVec<0, 1>,
More information about the llvm-commits
mailing list