[llvm] 981f7d5 - [AArch64] Implement extract_subvector for predicates.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 8 00:19:06 PDT 2021
Author: Sander de Smalen
Date: 2021-09-08T08:18:34+01:00
New Revision: 981f7d563aa8ab3ed693d55c055b193adff4bba5
URL: https://github.com/llvm/llvm-project/commit/981f7d563aa8ab3ed693d55c055b193adff4bba5
DIFF: https://github.com/llvm/llvm-project/commit/981f7d563aa8ab3ed693d55c055b193adff4bba5.diff
LOG: [AArch64] Implement extract_subvector for predicates.
This patch implements extract_subvector for predicate types when
the input type is more than twice the size of the subvector that
is being extracted.
Reviewed By: CarolineConcatto
Differential Revision: https://reviews.llvm.org/D109314
Added:
Modified:
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/test/CodeGen/AArch64/sve-extract-vector.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 3a6f0841f2003..4c342d1a58bc4 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1234,6 +1234,41 @@ let Predicates = [HasSVEorStreamingSVE] in {
def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
(PUNPKHI_PP PPR:$Ps)>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))),
+ (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 2))),
+ (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))),
+ (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 6))),
+ (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>;
+
+ def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))),
+ (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>;
+ def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 4))),
+ (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))>;
+ def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
+ (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>;
+ def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 12))),
+ (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>;
+
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))),
+ (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 2))),
+ (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 4))),
+ (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 6))),
+ (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
+ (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 10))),
+ (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 12))),
+ (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 14))),
+ (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>;
+
// Extract subvectors from FP SVE vectors
def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 0))),
(UUNPKLO_ZZ_D ZPR:$Zs)>;
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll
index cb6908db14ad1..c9e1486ba6353 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll
@@ -198,6 +198,179 @@ define <4 x i64> @extract_fixed_v4i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind
ret <4 x i64> %retval
}
+;
+; Extracting a predicate from a wider predicate, that is more than twice the size.
+;
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_0(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv16i1_0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %vec, i64 0)
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_2(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv16i1_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %vec, i64 2)
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_4(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv16i1_4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %vec, i64 4)
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_6(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv16i1_6:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %vec, i64 6)
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_8(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv16i1_8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %vec, i64 8)
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_10(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv16i1_10:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %vec, i64 10)
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_12(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv16i1_12:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %vec, i64 12)
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_14(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv16i1_14:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %vec, i64 14)
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv8i1_0(<vscale x 8 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv8i1_0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> %vec, i64 0)
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv8i1_2(<vscale x 8 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv8i1_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> %vec, i64 2)
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv8i1_4(<vscale x 8 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv8i1_4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> %vec, i64 4)
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv8i1_6(<vscale x 8 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv8i1_6:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> %vec, i64 6)
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 4 x i1> @extract_nxv4i1_nxv16i1_0(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv4i1_nxv16i1_0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i1> @llvm.experimental.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %vec, i64 0)
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 4 x i1> @extract_nxv4i1_nxv16i1_4(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv4i1_nxv16i1_4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i1> @llvm.experimental.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %vec, i64 4)
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 4 x i1> @extract_nxv4i1_nxv16i1_8(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv4i1_nxv16i1_8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i1> @llvm.experimental.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %vec, i64 8)
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 4 x i1> @extract_nxv4i1_nxv16i1_12(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv4i1_nxv16i1_12:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x i1> @llvm.experimental.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %vec, i64 12)
+ ret <vscale x 4 x i1> %res
+}
+
+
attributes #0 = { vscale_range(2,2) }
declare <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64>, i64)
@@ -209,3 +382,7 @@ declare <4 x i64> @llvm.experimental.vector.extract.v4i64.nxv2i64(<vscale x 2 x
declare <vscale x 1 x i32> @llvm.experimental.vector.extract.nxv1i32.nxv4i32(<vscale x 4 x i32>, i64)
declare <vscale x 1 x i16> @llvm.experimental.vector.extract.nxv1i16.nxv6i16(<vscale x 6 x i16>, i64)
+
+declare <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1>, i64)
+declare <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1>, i64)
+declare <vscale x 4 x i1> @llvm.experimental.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1>, i64)
More information about the llvm-commits
mailing list