[llvm] 9a2d4d1 - [SelectionDAG][AArch64] Legalize power of 2 vector.[de]interleaveN (#141513)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 3 04:05:47 PDT 2025
Author: Luke Lau
Date: 2025-06-03T12:05:44+01:00
New Revision: 9a2d4d176ad290eb556f27a990880acad99cc163
URL: https://github.com/llvm/llvm-project/commit/9a2d4d176ad290eb556f27a990880acad99cc163
DIFF: https://github.com/llvm/llvm-project/commit/9a2d4d176ad290eb556f27a990880acad99cc163.diff
LOG: [SelectionDAG][AArch64] Legalize power of 2 vector.[de]interleaveN (#141513)
After https://github.com/llvm/llvm-project/pull/139893, we now have
[de]interleave intrinsics for factors 2-8 inclusive, with the plan to
eventually get the loop vectorizer to emit a single intrinsic for these
factors instead of recursively deinterleaving (to support scalable
non-power-of-2 factors and to remove the complexity in the interleaved
access pass).
AArch64 currently supports scalable interleaved groups of factors 2 and
4 from the loop vectorizer. For factor 4 this is currently emitted as a
series of recursive [de]interleaves, and normally converted to a target
intrinsic in the interleaved access pass.
However if for some reason the interleaved access pass doesn't catch it,
the [de]interleave4 intrinsic will need to be lowered by the backend.
This patch legalizes the node and any other power-of-2 factor to smaller
factors, so if a target can lower [de]interleave2 it should be able to
handle this without crashing.
Factor 3 will probably be more complicated to lower so I've left it out
for now. We can disable it in the AArch64 cost model when implementing
the loop vectorizer changes.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll
llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 2c41a871b6d6c..46a2370108278 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3472,6 +3472,59 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(TLI.expandVectorSplice(Node, DAG));
break;
}
+ case ISD::VECTOR_DEINTERLEAVE: {
+ unsigned Factor = Node->getNumOperands();
+ if (Factor <= 2 || !isPowerOf2_32(Factor))
+ break;
+ SmallVector<SDValue, 8> Ops;
+ for (SDValue Op : Node->ops())
+ Ops.push_back(Op);
+ EVT VecVT = Node->getValueType(0);
+ SmallVector<EVT> HalfVTs(Factor / 2, VecVT);
+ // Deinterleave at Factor/2 so each result contains two factors interleaved:
+ // a0b0 c0d0 a1b1 c1d1 -> [a0c0 b0d0] [a1c1 b1d1]
+ SDValue L = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, dl, HalfVTs,
+ ArrayRef(Ops).take_front(Factor / 2));
+ SDValue R = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, dl, HalfVTs,
+ ArrayRef(Ops).take_back(Factor / 2));
+ Results.resize(Factor);
+ // Deinterleave the 2 factors out:
+ // [a0c0 a1c1] [b0d0 b1d1] -> a0a1 b0b1 c0c1 d0d1
+ for (unsigned I = 0; I < Factor / 2; I++) {
+ SDValue Deinterleave =
+ DAG.getNode(ISD::VECTOR_DEINTERLEAVE, dl, {VecVT, VecVT},
+ {L.getValue(I), R.getValue(I)});
+ Results[I] = Deinterleave.getValue(0);
+ Results[I + Factor / 2] = Deinterleave.getValue(1);
+ }
+ break;
+ }
+ case ISD::VECTOR_INTERLEAVE: {
+ unsigned Factor = Node->getNumOperands();
+ if (Factor <= 2 || !isPowerOf2_32(Factor))
+ break;
+ EVT VecVT = Node->getValueType(0);
+ SmallVector<EVT> HalfVTs(Factor / 2, VecVT);
+ SmallVector<SDValue, 8> LOps, ROps;
+ // Interleave so we have 2 factors per result:
+ // a0a1 b0b1 c0c1 d0d1 -> [a0c0 b0d0] [a1c1 b1d1]
+ for (unsigned I = 0; I < Factor / 2; I++) {
+ SDValue Interleave =
+ DAG.getNode(ISD::VECTOR_INTERLEAVE, dl, {VecVT, VecVT},
+ {Node->getOperand(I), Node->getOperand(I + Factor / 2)});
+ LOps.push_back(Interleave.getValue(0));
+ ROps.push_back(Interleave.getValue(1));
+ }
+ // Interleave at Factor/2:
+ // [a0c0 b0d0] [a1c1 b1d1] -> a0b0 c0d0 a1b1 c1d1
+ SDValue L = DAG.getNode(ISD::VECTOR_INTERLEAVE, dl, HalfVTs, LOps);
+ SDValue R = DAG.getNode(ISD::VECTOR_INTERLEAVE, dl, HalfVTs, ROps);
+ for (unsigned I = 0; I < Factor / 2; I++)
+ Results.push_back(L.getValue(I));
+ for (unsigned I = 0; I < Factor / 2; I++)
+ Results.push_back(R.getValue(I));
+ break;
+ }
case ISD::EXTRACT_ELEMENT: {
EVT OpTy = Node->getOperand(0).getValueType();
if (Node->getConstantOperandVal(1)) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4dd9c513120bb..e2c12bba6e284 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -29208,6 +29208,10 @@ AArch64TargetLowering::LowerVECTOR_DEINTERLEAVE(SDValue Op,
EVT OpVT = Op.getValueType();
assert(OpVT.isScalableVector() &&
"Expected scalable vector in LowerVECTOR_DEINTERLEAVE.");
+
+ if (Op->getNumOperands() != 2)
+ return SDValue();
+
SDValue Even = DAG.getNode(AArch64ISD::UZP1, DL, OpVT, Op.getOperand(0),
Op.getOperand(1));
SDValue Odd = DAG.getNode(AArch64ISD::UZP2, DL, OpVT, Op.getOperand(0),
@@ -29222,6 +29226,9 @@ SDValue AArch64TargetLowering::LowerVECTOR_INTERLEAVE(SDValue Op,
assert(OpVT.isScalableVector() &&
"Expected scalable vector in LowerVECTOR_INTERLEAVE.");
+ if (Op->getNumOperands() != 2)
+ return SDValue();
+
SDValue Lo = DAG.getNode(AArch64ISD::ZIP1, DL, OpVT, Op.getOperand(0),
Op.getOperand(1));
SDValue Hi = DAG.getNode(AArch64ISD::ZIP2, DL, OpVT, Op.getOperand(0),
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll
index adf1b48b6998a..89fc10b47bb35 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll
@@ -151,6 +151,102 @@ define {<vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv
ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %retval
}
+define {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @vector_deinterleave_nxv16i8_nxv64i8(<vscale x 64 x i8> %vec) {
+; CHECK-LABEL: vector_deinterleave_nxv16i8_nxv64i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp1 z4.b, z2.b, z3.b
+; CHECK-NEXT: uzp1 z5.b, z0.b, z1.b
+; CHECK-NEXT: uzp2 z3.b, z2.b, z3.b
+; CHECK-NEXT: uzp2 z6.b, z0.b, z1.b
+; CHECK-NEXT: uzp1 z0.b, z5.b, z4.b
+; CHECK-NEXT: uzp2 z2.b, z5.b, z4.b
+; CHECK-NEXT: uzp1 z1.b, z6.b, z3.b
+; CHECK-NEXT: uzp2 z3.b, z6.b, z3.b
+; CHECK-NEXT: ret
+ %retval = call {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave4.nxv64i8(<vscale x 64 x i8> %vec)
+ ret {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} %retval
+}
+
+define {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} @vector_deinterleave_nxv8i16_nxv32i16(<vscale x 32 x i16> %vec) {
+; CHECK-LABEL: vector_deinterleave_nxv8i16_nxv32i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp1 z4.h, z2.h, z3.h
+; CHECK-NEXT: uzp1 z5.h, z0.h, z1.h
+; CHECK-NEXT: uzp2 z3.h, z2.h, z3.h
+; CHECK-NEXT: uzp2 z6.h, z0.h, z1.h
+; CHECK-NEXT: uzp1 z0.h, z5.h, z4.h
+; CHECK-NEXT: uzp2 z2.h, z5.h, z4.h
+; CHECK-NEXT: uzp1 z1.h, z6.h, z3.h
+; CHECK-NEXT: uzp2 z3.h, z6.h, z3.h
+; CHECK-NEXT: ret
+ %retval = call {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.vector.deinterleave4.nxv32i16(<vscale x 32 x i16> %vec)
+ ret {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} %retval
+}
+
+define {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @vector_deinterleave_nxv4i32_nxv16i32(<vscale x 16 x i32> %vec) {
+; CHECK-LABEL: vector_deinterleave_nxv4i32_nxv16i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp1 z4.s, z2.s, z3.s
+; CHECK-NEXT: uzp1 z5.s, z0.s, z1.s
+; CHECK-NEXT: uzp2 z3.s, z2.s, z3.s
+; CHECK-NEXT: uzp2 z6.s, z0.s, z1.s
+; CHECK-NEXT: uzp1 z0.s, z5.s, z4.s
+; CHECK-NEXT: uzp2 z2.s, z5.s, z4.s
+; CHECK-NEXT: uzp1 z1.s, z6.s, z3.s
+; CHECK-NEXT: uzp2 z3.s, z6.s, z3.s
+; CHECK-NEXT: ret
+ %retval = call {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave4.nxv16i32(<vscale x 16 x i32> %vec)
+ ret {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} %retval
+}
+
+define {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv8i64(<vscale x 8 x i64> %vec) {
+; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv8i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp1 z4.d, z2.d, z3.d
+; CHECK-NEXT: uzp1 z5.d, z0.d, z1.d
+; CHECK-NEXT: uzp2 z3.d, z2.d, z3.d
+; CHECK-NEXT: uzp2 z6.d, z0.d, z1.d
+; CHECK-NEXT: uzp1 z0.d, z5.d, z4.d
+; CHECK-NEXT: uzp2 z2.d, z5.d, z4.d
+; CHECK-NEXT: uzp1 z1.d, z6.d, z3.d
+; CHECK-NEXT: uzp2 z3.d, z6.d, z3.d
+; CHECK-NEXT: ret
+ %retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave4.nxv8i64(<vscale x 8 x i64> %vec)
+ ret {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} %retval
+}
+
+define {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv16i64(<vscale x 16 x i64> %vec) {
+; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv16i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp1 z24.d, z6.d, z7.d
+; CHECK-NEXT: uzp1 z25.d, z4.d, z5.d
+; CHECK-NEXT: uzp1 z26.d, z2.d, z3.d
+; CHECK-NEXT: uzp1 z27.d, z0.d, z1.d
+; CHECK-NEXT: uzp2 z6.d, z6.d, z7.d
+; CHECK-NEXT: uzp2 z4.d, z4.d, z5.d
+; CHECK-NEXT: uzp2 z2.d, z2.d, z3.d
+; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d
+; CHECK-NEXT: uzp1 z5.d, z25.d, z24.d
+; CHECK-NEXT: uzp2 z24.d, z25.d, z24.d
+; CHECK-NEXT: uzp1 z7.d, z27.d, z26.d
+; CHECK-NEXT: uzp1 z28.d, z4.d, z6.d
+; CHECK-NEXT: uzp2 z25.d, z27.d, z26.d
+; CHECK-NEXT: uzp1 z29.d, z0.d, z2.d
+; CHECK-NEXT: uzp2 z26.d, z4.d, z6.d
+; CHECK-NEXT: uzp2 z27.d, z0.d, z2.d
+; CHECK-NEXT: uzp1 z0.d, z7.d, z5.d
+; CHECK-NEXT: uzp1 z2.d, z25.d, z24.d
+; CHECK-NEXT: uzp2 z4.d, z7.d, z5.d
+; CHECK-NEXT: uzp1 z1.d, z29.d, z28.d
+; CHECK-NEXT: uzp1 z3.d, z27.d, z26.d
+; CHECK-NEXT: uzp2 z5.d, z29.d, z28.d
+; CHECK-NEXT: uzp2 z6.d, z25.d, z24.d
+; CHECK-NEXT: uzp2 z7.d, z27.d, z26.d
+; CHECK-NEXT: ret
+ %retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave8.nxv16i64(<vscale x 16 x i64> %vec)
+ ret {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} %retval
+}
+
; Predicated
define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_nxv16i1_nxv32i1(<vscale x 32 x i1> %vec) {
; CHECK-LABEL: vector_deinterleave_nxv16i1_nxv32i1:
@@ -279,7 +375,6 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @vector_deinterleave_nxv2i32_nxv
ret {<vscale x 2 x i32>, <vscale x 2 x i32>} %retval
}
-
; Floating declarations
declare {<vscale x 2 x half>,<vscale x 2 x half>} @llvm.vector.deinterleave2.nxv4f16(<vscale x 4 x half>)
declare {<vscale x 4 x half>, <vscale x 4 x half>} @llvm.vector.deinterleave2.nxv8f16(<vscale x 8 x half>)
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
index 288034422d9c0..34d026f43708c 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
@@ -146,6 +146,102 @@ define <vscale x 4 x i64> @interleave2_nxv4i64(<vscale x 2 x i64> %vec0, <vscale
ret <vscale x 4 x i64> %retval
}
+define <vscale x 64 x i8> @interleave4_nxv16i8(<vscale x 16 x i8> %vec0, <vscale x 16 x i8> %vec1, <vscale x 16 x i8> %vec2, <vscale x 16 x i8> %vec3) {
+; CHECK-LABEL: interleave4_nxv16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zip1 z4.b, z1.b, z3.b
+; CHECK-NEXT: zip1 z5.b, z0.b, z2.b
+; CHECK-NEXT: zip2 z3.b, z1.b, z3.b
+; CHECK-NEXT: zip2 z6.b, z0.b, z2.b
+; CHECK-NEXT: zip1 z0.b, z5.b, z4.b
+; CHECK-NEXT: zip2 z1.b, z5.b, z4.b
+; CHECK-NEXT: zip1 z2.b, z6.b, z3.b
+; CHECK-NEXT: zip2 z3.b, z6.b, z3.b
+; CHECK-NEXT: ret
+ %retval = call <vscale x 64 x i8> @llvm.vector.interleave4.nxv16i8(<vscale x 16 x i8> %vec0, <vscale x 16 x i8> %vec1, <vscale x 16 x i8> %vec2, <vscale x 16 x i8> %vec3)
+ ret <vscale x 64 x i8> %retval
+}
+
+define <vscale x 32 x i16> @interleave4_nxv8i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1, <vscale x 8 x i16> %vec2, <vscale x 8 x i16> %vec3) {
+; CHECK-LABEL: interleave4_nxv8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zip1 z4.h, z1.h, z3.h
+; CHECK-NEXT: zip1 z5.h, z0.h, z2.h
+; CHECK-NEXT: zip2 z3.h, z1.h, z3.h
+; CHECK-NEXT: zip2 z6.h, z0.h, z2.h
+; CHECK-NEXT: zip1 z0.h, z5.h, z4.h
+; CHECK-NEXT: zip2 z1.h, z5.h, z4.h
+; CHECK-NEXT: zip1 z2.h, z6.h, z3.h
+; CHECK-NEXT: zip2 z3.h, z6.h, z3.h
+; CHECK-NEXT: ret
+ %retval = call <vscale x 32 x i16> @llvm.vector.interleave4.nxv8i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1, <vscale x 8 x i16> %vec2, <vscale x 8 x i16> %vec3)
+ ret <vscale x 32 x i16> %retval
+}
+
+define <vscale x 16 x i32> @interleave4_nxv4i32(<vscale x 4 x i32> %vec0, <vscale x 4 x i32> %vec1, <vscale x 4 x i32> %vec2, <vscale x 4 x i32> %vec3) {
+; CHECK-LABEL: interleave4_nxv4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zip1 z4.s, z1.s, z3.s
+; CHECK-NEXT: zip1 z5.s, z0.s, z2.s
+; CHECK-NEXT: zip2 z3.s, z1.s, z3.s
+; CHECK-NEXT: zip2 z6.s, z0.s, z2.s
+; CHECK-NEXT: zip1 z0.s, z5.s, z4.s
+; CHECK-NEXT: zip2 z1.s, z5.s, z4.s
+; CHECK-NEXT: zip1 z2.s, z6.s, z3.s
+; CHECK-NEXT: zip2 z3.s, z6.s, z3.s
+; CHECK-NEXT: ret
+ %retval = call <vscale x 16 x i32> @llvm.vector.interleave4.nxv4i32(<vscale x 4 x i32> %vec0, <vscale x 4 x i32> %vec1, <vscale x 4 x i32> %vec2, <vscale x 4 x i32> %vec3)
+ ret <vscale x 16 x i32> %retval
+}
+
+define <vscale x 8 x i64> @interleave4_nxv8i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3) {
+; CHECK-LABEL: interleave4_nxv8i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zip1 z4.d, z1.d, z3.d
+; CHECK-NEXT: zip1 z5.d, z0.d, z2.d
+; CHECK-NEXT: zip2 z3.d, z1.d, z3.d
+; CHECK-NEXT: zip2 z6.d, z0.d, z2.d
+; CHECK-NEXT: zip1 z0.d, z5.d, z4.d
+; CHECK-NEXT: zip2 z1.d, z5.d, z4.d
+; CHECK-NEXT: zip1 z2.d, z6.d, z3.d
+; CHECK-NEXT: zip2 z3.d, z6.d, z3.d
+; CHECK-NEXT: ret
+ %retval = call <vscale x 8 x i64> @llvm.vector.interleave4.nxv8i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3)
+ ret <vscale x 8 x i64> %retval
+}
+
+define <vscale x 16 x i64> @interleave8_nxv16i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3, <vscale x 2 x i64> %vec4, <vscale x 2 x i64> %vec5, <vscale x 2 x i64> %vec6, <vscale x 2 x i64> %vec7) {
+; CHECK-LABEL: interleave8_nxv16i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zip1 z24.d, z3.d, z7.d
+; CHECK-NEXT: zip1 z25.d, z1.d, z5.d
+; CHECK-NEXT: zip1 z26.d, z2.d, z6.d
+; CHECK-NEXT: zip1 z27.d, z0.d, z4.d
+; CHECK-NEXT: zip2 z3.d, z3.d, z7.d
+; CHECK-NEXT: zip2 z1.d, z1.d, z5.d
+; CHECK-NEXT: zip2 z2.d, z2.d, z6.d
+; CHECK-NEXT: zip2 z0.d, z0.d, z4.d
+; CHECK-NEXT: zip1 z4.d, z25.d, z24.d
+; CHECK-NEXT: zip2 z6.d, z25.d, z24.d
+; CHECK-NEXT: zip1 z5.d, z27.d, z26.d
+; CHECK-NEXT: zip2 z7.d, z27.d, z26.d
+; CHECK-NEXT: zip1 z24.d, z1.d, z3.d
+; CHECK-NEXT: zip1 z25.d, z0.d, z2.d
+; CHECK-NEXT: zip2 z26.d, z1.d, z3.d
+; CHECK-NEXT: zip2 z27.d, z0.d, z2.d
+; CHECK-NEXT: zip1 z0.d, z5.d, z4.d
+; CHECK-NEXT: zip2 z1.d, z5.d, z4.d
+; CHECK-NEXT: zip1 z2.d, z7.d, z6.d
+; CHECK-NEXT: zip2 z3.d, z7.d, z6.d
+; CHECK-NEXT: zip1 z4.d, z25.d, z24.d
+; CHECK-NEXT: zip2 z5.d, z25.d, z24.d
+; CHECK-NEXT: zip1 z6.d, z27.d, z26.d
+; CHECK-NEXT: zip2 z7.d, z27.d, z26.d
+; CHECK-NEXT: ret
+ %retval = call <vscale x 16 x i64> @llvm.vector.interleave8.nxv16i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3, <vscale x 2 x i64> %vec4, <vscale x 2 x i64> %vec5, <vscale x 2 x i64> %vec6, <vscale x 2 x i64> %vec7)
+ ret <vscale x 16 x i64> %retval
+}
+
; Predicated
define <vscale x 32 x i1> @interleave2_nxv32i1(<vscale x 16 x i1> %vec0, <vscale x 16 x i1> %vec1) {
More information about the llvm-commits
mailing list