[llvm] 801a745 - [SelectionDAG] PromoteIntRes_EXTRACT_SUBVECTOR for scalable vectors.

Fri Sep 10 05:30:32 PDT 2021

Author: Sander de Smalen
Date: 2021-09-10T13:29:26+01:00
New Revision: 801a745dd27d60ae70b984e66b02c172d261feb0

URL: https://github.com/llvm/llvm-project/commit/801a745dd27d60ae70b984e66b02c172d261feb0
DIFF: https://github.com/llvm/llvm-project/commit/801a745dd27d60ae70b984e66b02c172d261feb0.diff

LOG: [SelectionDAG] PromoteIntRes_EXTRACT_SUBVECTOR for scalable vectors.

This patch implements legalization of EXTRACT_SUBVECTOR for the case
where the result needs promoting, and the input type is either legal
or requires splitting.

The idea is that the operation is broken down into simpler steps,
by first extracting a smaller subvector until the input vector
becomes legal or requires promotion.

Reviewed By: CarolineConcatto

Differential Revision: https://reviews.llvm.org/D109313

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
    llvm/test/CodeGen/AArch64/sve-extract-vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 91ff82588df14..cef50bc6b8431 100644

--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -4786,6 +4786,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
     SDValue InOp0 = N->getOperand(0);
     EVT InVT = InOp0.getValueType();
 
+    // Try and extract from a smaller type so that it eventually falls
+    // into the promotion code below.
+    if (getTypeAction(InVT) == TargetLowering::TypeSplitVector ||
+        getTypeAction(InVT) == TargetLowering::TypeLegal) {
+      EVT NInVT = InVT.getHalfNumVectorElementsVT(*DAG.getContext());
+      unsigned NElts = NInVT.getVectorMinNumElements();
+      uint64_t IdxVal = cast<ConstantSDNode>(BaseIdx)->getZExtValue();
+
+      SDValue Step1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NInVT, InOp0,
+                                  DAG.getConstant(alignDown(IdxVal, NElts), dl,
+                                                  BaseIdx.getValueType()));
+      SDValue Step2 = DAG.getNode(
+          ISD::EXTRACT_SUBVECTOR, dl, OutVT, Step1,
+          DAG.getConstant(IdxVal % NElts, dl, BaseIdx.getValueType()));
+      return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Step2);
+    }
+
     // Promote operands and see if this is handled by target lowering,
     // Otherwise, use the BUILD_VECTOR approach below
     if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) {

diff  --git a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll
index c9e1486ba6353..63d906eab065c 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll
@@ -370,6 +370,185 @@ define <vscale x 4 x i1> @extract_nxv4i1_nxv16i1_12(<vscale x 16 x i1> %vec) {
   ret <vscale x 4 x i1> %res
 }
 
+;
+; Extracting illegal vector that needs promotion from a vector that needs splitting.
+;
+
+define <vscale x 2 x i8> @extract_nxv2i8_nxv32i8_0(<vscale x 32 x i8> %vec) {
+; CHECK-LABEL: extract_nxv2i8_nxv32i8_0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 0)
+  ret <vscale x 2 x i8> %res
+}
+
+define <vscale x 2 x i8> @extract_nxv2i8_nxv32i8_2(<vscale x 32 x i8> %vec) {
+; CHECK-LABEL: extract_nxv2i8_nxv32i8_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    uunpkhi z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 2)
+  ret <vscale x 2 x i8> %res
+}
+
+define <vscale x 2 x i8> @extract_nxv2i8_nxv32i8_4(<vscale x 32 x i8> %vec) {
+; CHECK-LABEL: extract_nxv2i8_nxv32i8_4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 4)
+  ret <vscale x 2 x i8> %res
+}
+
+define <vscale x 2 x i8> @extract_nxv2i8_nxv32i8_6(<vscale x 32 x i8> %vec) {
+; CHECK-LABEL: extract_nxv2i8_nxv32i8_6:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
+; CHECK-NEXT:    uunpkhi z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 6)
+  ret <vscale x 2 x i8> %res
+}
+
+define <vscale x 2 x i8> @extract_nxv2i8_nxv32i8_8(<vscale x 32 x i8> %vec) {
+; CHECK-LABEL: extract_nxv2i8_nxv32i8_8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpkhi z0.h, z0.b
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 8)
+  ret <vscale x 2 x i8> %res
+}
+
+define <vscale x 2 x i8> @extract_nxv2i8_nxv32i8_10(<vscale x 32 x i8> %vec) {
+; CHECK-LABEL: extract_nxv2i8_nxv32i8_10:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpkhi z0.h, z0.b
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    uunpkhi z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 10)
+  ret <vscale x 2 x i8> %res
+}
+
+define <vscale x 2 x i8> @extract_nxv2i8_nxv32i8_12(<vscale x 32 x i8> %vec) {
+; CHECK-LABEL: extract_nxv2i8_nxv32i8_12:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpkhi z0.h, z0.b
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 12)
+  ret <vscale x 2 x i8> %res
+}
+
+define <vscale x 2 x i8> @extract_nxv2i8_nxv32i8_14(<vscale x 32 x i8> %vec) {
+; CHECK-LABEL: extract_nxv2i8_nxv32i8_14:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpkhi z0.h, z0.b
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
+; CHECK-NEXT:    uunpkhi z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 14)
+  ret <vscale x 2 x i8> %res
+}
+
+define <vscale x 2 x i8> @extract_nxv2i8_nxv32i8_16(<vscale x 32 x i8> %vec) {
+; CHECK-LABEL: extract_nxv2i8_nxv32i8_16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.h, z1.b
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 16)
+  ret <vscale x 2 x i8> %res
+}
+
+define <vscale x 2 x i8> @extract_nxv2i8_nxv32i8_18(<vscale x 32 x i8> %vec) {
+; CHECK-LABEL: extract_nxv2i8_nxv32i8_18:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.h, z1.b
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    uunpkhi z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 18)
+  ret <vscale x 2 x i8> %res
+}
+
+define <vscale x 2 x i8> @extract_nxv2i8_nxv32i8_20(<vscale x 32 x i8> %vec) {
+; CHECK-LABEL: extract_nxv2i8_nxv32i8_20:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.h, z1.b
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 20)
+  ret <vscale x 2 x i8> %res
+}
+
+define <vscale x 2 x i8> @extract_nxv2i8_nxv32i8_22(<vscale x 32 x i8> %vec) {
+; CHECK-LABEL: extract_nxv2i8_nxv32i8_22:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.h, z1.b
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
+; CHECK-NEXT:    uunpkhi z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 22)
+  ret <vscale x 2 x i8> %res
+}
+
+define <vscale x 2 x i8> @extract_nxv2i8_nxv32i8_24(<vscale x 32 x i8> %vec) {
+; CHECK-LABEL: extract_nxv2i8_nxv32i8_24:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpkhi z0.h, z1.b
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 24)
+  ret <vscale x 2 x i8> %res
+}
+
+define <vscale x 2 x i8> @extract_nxv2i8_nxv32i8_26(<vscale x 32 x i8> %vec) {
+; CHECK-LABEL: extract_nxv2i8_nxv32i8_26:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpkhi z0.h, z1.b
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    uunpkhi z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 26)
+  ret <vscale x 2 x i8> %res
+}
+
+define <vscale x 2 x i8> @extract_nxv2i8_nxv32i8_28(<vscale x 32 x i8> %vec) {
+; CHECK-LABEL: extract_nxv2i8_nxv32i8_28:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpkhi z0.h, z1.b
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 28)
+  ret <vscale x 2 x i8> %res
+}
+
+define <vscale x 2 x i8> @extract_nxv2i8_nxv32i8_30(<vscale x 32 x i8> %vec) {
+; CHECK-LABEL: extract_nxv2i8_nxv32i8_30:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpkhi z0.h, z1.b
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
+; CHECK-NEXT:    uunpkhi z0.d, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> %vec, i64 30)
+  ret <vscale x 2 x i8> %res
+}
 
 attributes #0 = { vscale_range(2,2) }
 
@@ -386,3 +565,5 @@ declare <vscale x 1 x i16> @llvm.experimental.vector.extract.nxv1i16.nxv6i16(<vs
 declare <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1>, i64)
 declare <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1>, i64)
 declare <vscale x 4 x i1> @llvm.experimental.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1>, i64)
+
+declare <vscale x 2 x i8> @llvm.experimental.vector.extract.nxv2i8.nxv32i8(<vscale x 32 x i8> , i64)