[llvm] ae09670 - [CodeGen][SVE] CopyToReg: Split scalable EVTs that are not powers of 2

Mon Jun 8 02:40:56 PDT 2020

Author: Sander de Smalen
Date: 2020-06-08T10:39:18+01:00
New Revision: ae09670ee4de461669fc9f09b146ba0a5f0935fb

URL: https://github.com/llvm/llvm-project/commit/ae09670ee4de461669fc9f09b146ba0a5f0935fb
DIFF: https://github.com/llvm/llvm-project/commit/ae09670ee4de461669fc9f09b146ba0a5f0935fb.diff

LOG: [CodeGen][SVE] CopyToReg: Split scalable EVTs that are not powers of 2

Scalable vectors cannot use 'BUILD_VECTOR', so it is necessary to
properly split and widen scalable vectors when passing them
to CopyToReg/CopyFromReg.

This functionality is added to TargetLoweringBase::getVectorTypeBreakdown().

This patch only adds support for 'splitting' scalable vectors that
are a multiple of some legal type, e.g.

      <vscale x 6 x i64> -> 3 x <vscale x 2 x i64>

Reviewers: efriedma, c-rhodes

Reviewed By: efriedma

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D80139

Added: 
    llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll

Modified: 
    llvm/lib/CodeGen/TargetLoweringBase.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index acc2dce2a0e8..6ec6498369cc 100644

--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -955,6 +955,12 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
 
   unsigned NumVectorRegs = 1;
 
+  // Scalable vectors cannot be scalarized, so splitting or widening is
+  // required.
+  if (VT.isScalableVector() && !isPowerOf2_32(EC.Min))
+    llvm_unreachable(
+        "Splitting or widening of non-power-of-2 MVTs is not implemented.");
+
   // FIXME: We don't support non-power-of-2-sized vectors for now.
   // Ideally we could break down into LHS/RHS like LegalizeDAG does.
   if (!isPowerOf2_32(EC.Min)) {
@@ -1418,8 +1424,34 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
 
   unsigned NumVectorRegs = 1;
 
-  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we
-  // could break down into LHS/RHS like LegalizeDAG does.
+  // Scalable vectors cannot be scalarized, so handle the legalisation of the
+  // types like done elsewhere in SelectionDAG.
+  if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.Min)) {
+    LegalizeKind LK;
+    EVT PartVT = VT;
+    do {
+      // Iterate until we've found a legal (part) type to hold VT.
+      LK = getTypeConversion(Context, PartVT);
+      PartVT = LK.second;
+    } while (LK.first != TypeLegal);
+
+    NumIntermediates =
+        VT.getVectorElementCount().Min / PartVT.getVectorElementCount().Min;
+
+    // FIXME: This code needs to be extended to handle more complex vector
+    // breakdowns, like nxv7i64 -> nxv8i64 -> 4 x nxv2i64. Currently the only
+    // supported cases are vectors that are broken down into equal parts
+    // such as nxv6i64 -> 3 x nxv2i64.
+    assert(NumIntermediates * PartVT.getVectorElementCount().Min ==
+               VT.getVectorElementCount().Min &&
+           "Expected an integer multiple of PartVT");
+    IntermediateVT = PartVT;
+    RegisterVT = getRegisterType(Context, IntermediateVT);
+    return NumIntermediates;
+  }
+
+  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally
+  // we could break down into LHS/RHS like LegalizeDAG does.
   if (!isPowerOf2_32(EltCnt.Min)) {
     NumVectorRegs = EltCnt.Min;
     EltCnt.Min = 1;

diff  --git a/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll b/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll
new file mode 100644
index 000000000000..14123b476704
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll
@@ -0,0 +1,312 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
+
+; Test that scalable vectors that are a multiple of the legal vector size
+; can be properly broken down into part vectors.
+
+declare void @bar()
+
+;
+; Vectors twice the size
+;
+
+define <vscale x 32 x i8> @wide_32i8(i1 %b, <vscale x 16 x i8> %legal, <vscale x 32 x i8> %illegal) nounwind {
+; CHECK-LABEL: wide_32i8
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 32 x i8> undef
+L2:
+  ret <vscale x 32 x i8> %illegal
+}
+
+define <vscale x 16 x i16> @wide_16i16(i1 %b, <vscale x 16 x i8> %legal, <vscale x 16 x i16> %illegal) nounwind {
+; CHECK-LABEL: wide_16i16
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 16 x i16> undef
+L2:
+  ret <vscale x 16 x i16> %illegal
+}
+
+define <vscale x 8 x i32> @wide_8i32(i1 %b, <vscale x 16 x i8> %legal, <vscale x 8 x i32> %illegal) nounwind {
+; CHECK-LABEL: wide_8i32
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 8 x i32> undef
+L2:
+  ret <vscale x 8 x i32> %illegal
+}
+
+define <vscale x 4 x i64> @wide_4i64(i1 %b, <vscale x 16 x i8> %legal, <vscale x 4 x i64> %illegal) nounwind {
+; CHECK-LABEL: wide_4i64
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 4 x i64> undef
+L2:
+  ret <vscale x 4 x i64> %illegal
+}
+
+define <vscale x 16 x half> @wide_16f16(i1 %b, <vscale x 16 x i8> %legal, <vscale x 16 x half> %illegal) nounwind {
+; CHECK-LABEL: wide_16f16
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 16 x half> undef
+L2:
+  ret <vscale x 16 x half> %illegal
+}
+
+define <vscale x 8 x float> @wide_8f32(i1 %b, <vscale x 16 x i8> %legal, <vscale x 8 x float> %illegal) nounwind {
+; CHECK-LABEL: wide_8f32
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 8 x float> undef
+L2:
+  ret <vscale x 8 x float> %illegal
+}
+
+define <vscale x 4 x double> @wide_4f64(i1 %b, <vscale x 16 x i8> %legal, <vscale x 4 x double> %illegal) nounwind {
+; CHECK-LABEL: wide_4f64
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 4 x double> undef
+L2:
+  ret <vscale x 4 x double> %illegal
+}
+
+;
+; Vectors three times the size
+;
+
+define <vscale x 48 x i8> @wide_48i8(i1 %b, <vscale x 16 x i8> %legal, <vscale x 48 x i8> %illegal) nounwind {
+; CHECK-LABEL: wide_48i8
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: mov     z2.d, z3.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 48 x i8> undef
+L2:
+  ret <vscale x 48 x i8> %illegal
+}
+
+define <vscale x 24 x i16> @wide_24i16(i1 %b, <vscale x 16 x i8> %legal, <vscale x 24 x i16> %illegal) nounwind {
+; CHECK-LABEL: wide_24i16
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: mov     z2.d, z3.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 24 x i16> undef
+L2:
+  ret <vscale x 24 x i16> %illegal
+}
+
+define <vscale x 12 x i32> @wide_12i32(i1 %b, <vscale x 16 x i8> %legal, <vscale x 12 x i32> %illegal) nounwind {
+; CHECK-LABEL: wide_12i32
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: mov     z2.d, z3.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 12 x i32> undef
+L2:
+  ret <vscale x 12 x i32> %illegal
+}
+
+define <vscale x 6 x i64> @wide_6i64(i1 %b, <vscale x 16 x i8> %legal, <vscale x 6 x i64> %illegal) nounwind {
+; CHECK-LABEL: wide_6i64
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: mov     z2.d, z3.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 6 x i64> undef
+L2:
+  ret <vscale x 6 x i64> %illegal
+}
+
+define <vscale x 24 x half> @wide_24f16(i1 %b, <vscale x 16 x i8> %legal, <vscale x 24 x half> %illegal) nounwind {
+; CHECK-LABEL: wide_24f16
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: mov     z2.d, z3.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 24 x half> undef
+L2:
+  ret <vscale x 24 x half> %illegal
+}
+
+define <vscale x 12 x float> @wide_12f32(i1 %b, <vscale x 16 x i8> %legal, <vscale x 12 x float> %illegal) nounwind {
+; CHECK-LABEL: wide_12f32
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: mov     z2.d, z3.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 12 x float> undef
+L2:
+  ret <vscale x 12 x float> %illegal
+}
+
+define <vscale x 6 x double> @wide_6f64(i1 %b, <vscale x 16 x i8> %legal, <vscale x 6 x double> %illegal) nounwind {
+; CHECK-LABEL: wide_6f64
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: mov     z2.d, z3.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 6 x double> undef
+L2:
+  ret <vscale x 6 x double> %illegal
+}
+
+;
+; Vectors four times the size
+;
+
+define <vscale x 64 x i8> @wide_64i8(i1 %b, <vscale x 16 x i8> %legal, <vscale x 64 x i8> %illegal) nounwind {
+; CHECK-LABEL: wide_64i8
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: mov     z2.d, z3.d
+; CHECK-NEXT: mov     z3.d, z4.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 64 x i8> undef
+L2:
+  ret <vscale x 64 x i8> %illegal
+}
+
+define <vscale x 32 x i16> @wide_32i16(i1 %b, <vscale x 16 x i8> %legal, <vscale x 32 x i16> %illegal) nounwind {
+; CHECK-LABEL: wide_32i16
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: mov     z2.d, z3.d
+; CHECK-NEXT: mov     z3.d, z4.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 32 x i16> undef
+L2:
+  ret <vscale x 32 x i16> %illegal
+}
+
+define <vscale x 16 x i32> @wide_16i32(i1 %b, <vscale x 16 x i8> %legal, <vscale x 16 x i32> %illegal) nounwind {
+; CHECK-LABEL: wide_16i32
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: mov     z2.d, z3.d
+; CHECK-NEXT: mov     z3.d, z4.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 16 x i32> undef
+L2:
+  ret <vscale x 16 x i32> %illegal
+}
+
+define <vscale x 8 x i64> @wide_8i64(i1 %b, <vscale x 16 x i8> %legal, <vscale x 8 x i64> %illegal) nounwind {
+; CHECK-LABEL: wide_8i64
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: mov     z2.d, z3.d
+; CHECK-NEXT: mov     z3.d, z4.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 8 x i64> undef
+L2:
+  ret <vscale x 8 x i64> %illegal
+}
+
+define <vscale x 32 x half> @wide_32f16(i1 %b, <vscale x 16 x i8> %legal, <vscale x 32 x half> %illegal) nounwind {
+; CHECK-LABEL: wide_32f16
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: mov     z2.d, z3.d
+; CHECK-NEXT: mov     z3.d, z4.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 32 x half> undef
+L2:
+  ret <vscale x 32 x half> %illegal
+}
+
+define <vscale x 16 x float> @wide_16f32(i1 %b, <vscale x 16 x i8> %legal, <vscale x 16 x float> %illegal) nounwind {
+; CHECK-LABEL: wide_16f32
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: mov     z2.d, z3.d
+; CHECK-NEXT: mov     z3.d, z4.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 16 x float> undef
+L2:
+  ret <vscale x 16 x float> %illegal
+}
+
+define <vscale x 8 x double> @wide_8f64(i1 %b, <vscale x 16 x i8> %legal, <vscale x 8 x double> %illegal) nounwind {
+; CHECK-LABEL: wide_8f64
+; CHECK:      mov     z0.d, z1.d
+; CHECK-NEXT: mov     z1.d, z2.d
+; CHECK-NEXT: mov     z2.d, z3.d
+; CHECK-NEXT: mov     z3.d, z4.d
+; CHECK-NEXT: ret
+  br i1 %b, label %L1, label %L2
+L1:
+  call void @bar()
+  ret <vscale x 8 x double> undef
+L2:
+  ret <vscale x 8 x double> %illegal
+}