[llvm] 88bbd30 - [SVE][CodeGen] Fix issues with EXTRACT_SUBVECTOR when using scalable FP vectors

Wed Aug 12 00:36:16 PDT 2020

Author: David Sherwood
Date: 2020-08-12T08:35:46+01:00
New Revision: 88bbd30736561190a6733d0ad60aec21446b914c

URL: https://github.com/llvm/llvm-project/commit/88bbd30736561190a6733d0ad60aec21446b914c
DIFF: https://github.com/llvm/llvm-project/commit/88bbd30736561190a6733d0ad60aec21446b914c.diff

LOG: [SVE][CodeGen] Fix issues with EXTRACT_SUBVECTOR when using scalable FP vectors

In this patch I have fixed two issues:

1. Our SVE tuple get/set intrinsics were using the wrong constant type
for the index passed to EXTRACT_SUBVECTOR. I have fixed this by using the
function SelectionDAG::getVectorIdxConstant to create the value. Also, I
have updated the documentation for EXTRACT_SUBVECTOR describing what type
the constant index should be and we now enforce this when creating the
node.
2. The AArch64 backend was missing the appropriate patterns for
extracting certain subvectors (nxv4f16 and nxv2f32) from legal SVE types.
I have added them as part of this patch.

The only way that I could find to test the new patterns was to use the
SVE tuple get intrinsics, although I realise it looks a bit unusual.
Tests added here:

  test/CodeGen/AArch64/sve-extract-subvector.ll

Differential Revision: https://reviews.llvm.org/D85516

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/ISDOpcodes.h
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/test/CodeGen/AArch64/sve-extract-subvector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 59565128c847..ae08d6e9313d 100644

--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -514,7 +514,8 @@ enum NodeType {
   /// IDX is first scaled by the runtime scaling factor of T. Elements IDX
   /// through (IDX + num_elements(T) - 1) must be valid VECTOR indices. If this
   /// condition cannot be determined statically but is false at runtime, then
-  /// the result vector is undefined.
+  /// the result vector is undefined. The IDX parameter must be a vector index
+  /// constant type, which for most targets will be an integer pointer type.
   ///
   /// This operation supports extracting a fixed-width vector from a scalable
   /// vector, but not the other way around.

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 26c75ab2bb33..1aefe8cc5b3a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5560,6 +5560,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
             (VT.getVectorMinNumElements() + N2C->getZExtValue()) <=
                 N1VT.getVectorMinNumElements()) &&
            "Extract subvector overflow!");
+    assert(N2C->getAPIntValue().getBitWidth() ==
+               TLI->getVectorIdxTy(getDataLayout())
+                   .getSizeInBits()
+                   .getFixedSize() &&
+           "Constant index for EXTRACT_SUBVECTOR has an invalid size");
 
     // Trivial extraction.
     if (VT == N1VT)

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index db3d3591a9ee..d5720b9ea383 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14235,9 +14235,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
       uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
       EVT ResVT = N->getValueType(0);
       uint64_t NumLanes = ResVT.getVectorElementCount().Min;
+      SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL);
       SDValue Val =
-          DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1,
-                      DAG.getConstant(IdxConst * NumLanes, DL, MVT::i32));
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx);
       return DAG.getMergeValues({Val, Chain}, DL);
     }
     case Intrinsic::aarch64_sve_tuple_set: {
@@ -14263,9 +14263,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
         if (I == IdxConst)
           Opnds.push_back(Vec);
         else {
-          Opnds.push_back(
-              DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, Vec.getValueType(), Tuple,
-                          DAG.getConstant(I * NumLanes, DL, MVT::i32)));
+          SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL);
+          Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
+                                      Vec.getValueType(), Tuple, ExtIdx));
         }
       }
       SDValue Concat =

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 1a6653cee88f..f3a67d437c6b 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1155,6 +1155,16 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
   def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
             (ZIP2_PPP_B PPR:$Ps, (PFALSE))>;
 
+  // Extract subvectors from FP SVE vectors
+  def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))),
+            (UUNPKLO_ZZ_S ZPR:$Zs)>;
+  def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))),
+            (UUNPKHI_ZZ_S ZPR:$Zs)>;
+  def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 0))),
+            (UUNPKLO_ZZ_D ZPR:$Zs)>;
+  def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 2))),
+            (UUNPKHI_ZZ_D ZPR:$Zs)>;
+
   // Concatenate two predicates.
   def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)),
             (UZP1_PPP_S $p1, $p2)>;

diff  --git a/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll
index 40c147c31ff8..5b0bdfc37b1a 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll
@@ -28,5 +28,43 @@ define <vscale x 2 x i64> @extract_nxv2i64_nxv32i8(<vscale x 32 x i8> %z0_z1) {
   ret <vscale x 2 x i64> %ext
 }
 
+define <vscale x 4 x half> @extract_lo_nxv4f16_nxv8f16(<vscale x 8 x half> %z0) {
+; CHECK-LABEL: extract_lo_nxv4f16_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ret
+  %ext = call <vscale x 4 x half> @llvm.aarch64.sve.tuple.get.nxv8f16(<vscale x 8 x half> %z0, i32 0)
+  ret <vscale x 4 x half> %ext
+}
+
+define <vscale x 4 x half> @extract_hi_nxv4f16_nxv8f16(<vscale x 8 x half> %z0) {
+; CHECK-LABEL: extract_hi_nxv4f16_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
+; CHECK-NEXT:    ret
+  %ext = call <vscale x 4 x half> @llvm.aarch64.sve.tuple.get.nxv8f16(<vscale x 8 x half> %z0, i32 1)
+  ret <vscale x 4 x half> %ext
+}
+
+define <vscale x 2 x float> @extract_lo_nxv2f32_nxv4f32(<vscale x 4 x float> %z0) {
+; CHECK-LABEL: extract_lo_nxv2f32_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
+  %ext = call <vscale x 2 x float> @llvm.aarch64.sve.tuple.get.nxv4f32(<vscale x 4 x float> %z0, i32 0)
+  ret <vscale x 2 x float> %ext
+}
+
+define <vscale x 2 x float> @extract_hi_nxv2f32_nxv4f32(<vscale x 4 x float> %z0) {
+; CHECK-LABEL: extract_hi_nxv2f32_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpkhi z0.d, z0.s
+; CHECK-NEXT:    ret
+  %ext = call <vscale x 2 x float> @llvm.aarch64.sve.tuple.get.nxv4f32(<vscale x 4 x float> %z0, i32 1)
+  ret <vscale x 2 x float> %ext
+}
+
 declare <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv4i64(<vscale x 4 x i64>, i32)
 declare <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv32i8(<vscale x 32 x i8>, i32)
+declare <vscale x 2 x float> @llvm.aarch64.sve.tuple.get.nxv4f32(<vscale x 4 x float>, i32)
+declare <vscale x 4 x half> @llvm.aarch64.sve.tuple.get.nxv8f16(<vscale x 8 x half>, i32)