[llvm] 981f7d5 - [AArch64] Implement extract_subvector for predicates.

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 8 00:19:06 PDT 2021


Author: Sander de Smalen
Date: 2021-09-08T08:18:34+01:00
New Revision: 981f7d563aa8ab3ed693d55c055b193adff4bba5

URL: https://github.com/llvm/llvm-project/commit/981f7d563aa8ab3ed693d55c055b193adff4bba5
DIFF: https://github.com/llvm/llvm-project/commit/981f7d563aa8ab3ed693d55c055b193adff4bba5.diff

LOG: [AArch64] Implement extract_subvector for predicates.

This patch implements extract_subvector for predicate types when
the input type is more than twice the size of the subvector that
is being extracted.

Reviewed By: CarolineConcatto

Differential Revision: https://reviews.llvm.org/D109314

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/test/CodeGen/AArch64/sve-extract-vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 3a6f0841f2003..4c342d1a58bc4 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1234,6 +1234,41 @@ let Predicates = [HasSVEorStreamingSVE] in {
   def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
             (PUNPKHI_PP PPR:$Ps)>;
 
+  def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))),
+            (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>;
+  def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 2))),
+            (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))>;
+  def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))),
+            (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>;
+  def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 6))),
+            (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>;
+
+  def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))),
+            (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>;
+  def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 4))),
+            (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))>;
+  def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
+            (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>;
+  def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 12))),
+            (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>;
+
+  def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))),
+            (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>;
+  def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 2))),
+            (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>;
+  def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 4))),
+            (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>;
+  def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 6))),
+            (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>;
+  def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
+            (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>;
+  def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 10))),
+            (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>;
+  def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 12))),
+            (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>;
+  def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 14))),
+            (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>;
+
   // Extract subvectors from FP SVE vectors
   def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 0))),
             (UUNPKLO_ZZ_D ZPR:$Zs)>;

diff  --git a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll
index cb6908db14ad1..c9e1486ba6353 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll
@@ -198,6 +198,179 @@ define <4 x i64> @extract_fixed_v4i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind
   ret <4 x i64> %retval
 }
 
+;
+; Extracting a predicate from a wider predicate, that is more than twice the size.
+;
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_0(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv16i1_0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %vec, i64 0)
+  ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_2(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv16i1_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %vec, i64 2)
+  ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_4(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv16i1_4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %vec, i64 4)
+  ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_6(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv16i1_6:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %vec, i64 6)
+  ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_8(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv16i1_8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %vec, i64 8)
+  ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_10(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv16i1_10:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %vec, i64 10)
+  ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_12(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv16i1_12:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %vec, i64 12)
+  ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv16i1_14(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv16i1_14:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %vec, i64 14)
+  ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv8i1_0(<vscale x 8 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv8i1_0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> %vec, i64 0)
+  ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv8i1_2(<vscale x 8 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv8i1_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> %vec, i64 2)
+  ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv8i1_4(<vscale x 8 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv8i1_4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> %vec, i64 4)
+  ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 2 x i1> @extract_nxv2i1_nxv8i1_6(<vscale x 8 x i1> %vec) {
+; CHECK-LABEL: extract_nxv2i1_nxv8i1_6:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> %vec, i64 6)
+  ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 4 x i1> @extract_nxv4i1_nxv16i1_0(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv4i1_nxv16i1_0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x i1> @llvm.experimental.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %vec, i64 0)
+  ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 4 x i1> @extract_nxv4i1_nxv16i1_4(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv4i1_nxv16i1_4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x i1> @llvm.experimental.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %vec, i64 4)
+  ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 4 x i1> @extract_nxv4i1_nxv16i1_8(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv4i1_nxv16i1_8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x i1> @llvm.experimental.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %vec, i64 8)
+  ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 4 x i1> @extract_nxv4i1_nxv16i1_12(<vscale x 16 x i1> %vec) {
+; CHECK-LABEL: extract_nxv4i1_nxv16i1_12:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x i1> @llvm.experimental.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %vec, i64 12)
+  ret <vscale x 4 x i1> %res
+}
+
+
 attributes #0 = { vscale_range(2,2) }
 
 declare <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64>, i64)
@@ -209,3 +382,7 @@ declare <4 x i64> @llvm.experimental.vector.extract.v4i64.nxv2i64(<vscale x 2 x
 
 declare <vscale x 1 x i32> @llvm.experimental.vector.extract.nxv1i32.nxv4i32(<vscale x 4 x i32>, i64)
 declare <vscale x 1 x i16> @llvm.experimental.vector.extract.nxv1i16.nxv6i16(<vscale x 6 x i16>, i64)
+
+declare <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1>, i64)
+declare <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1>, i64)
+declare <vscale x 4 x i1> @llvm.experimental.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1>, i64)


        


More information about the llvm-commits mailing list