[llvm] ae09670 - [CodeGen][SVE] CopyToReg: Split scalable EVTs that are not powers of 2
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 8 02:40:56 PDT 2020
Author: Sander de Smalen
Date: 2020-06-08T10:39:18+01:00
New Revision: ae09670ee4de461669fc9f09b146ba0a5f0935fb
URL: https://github.com/llvm/llvm-project/commit/ae09670ee4de461669fc9f09b146ba0a5f0935fb
DIFF: https://github.com/llvm/llvm-project/commit/ae09670ee4de461669fc9f09b146ba0a5f0935fb.diff
LOG: [CodeGen][SVE] CopyToReg: Split scalable EVTs that are not powers of 2
Scalable vectors cannot use 'BUILD_VECTOR', so it is necessary to
properly split and widen scalable vectors when passing them
to CopyToReg/CopyFromReg.
This functionality is added to TargetLoweringBase::getVectorTypeBreakdown().
This patch only adds support for 'splitting' scalable vectors that
are a multiple of some legal type, e.g.
<vscale x 6 x i64> -> 3 x <vscale x 2 x i64>
Reviewers: efriedma, c-rhodes
Reviewed By: efriedma
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80139
Added:
llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll
Modified:
llvm/lib/CodeGen/TargetLoweringBase.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index acc2dce2a0e8..6ec6498369cc 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -955,6 +955,12 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
unsigned NumVectorRegs = 1;
+ // Scalable vectors cannot be scalarized, so splitting or widening is
+ // required.
+ if (VT.isScalableVector() && !isPowerOf2_32(EC.Min))
+ llvm_unreachable(
+ "Splitting or widening of non-power-of-2 MVTs is not implemented.");
+
// FIXME: We don't support non-power-of-2-sized vectors for now.
// Ideally we could break down into LHS/RHS like LegalizeDAG does.
if (!isPowerOf2_32(EC.Min)) {
@@ -1418,8 +1424,34 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
unsigned NumVectorRegs = 1;
- // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
- // could break down into LHS/RHS like LegalizeDAG does.
+ // Scalable vectors cannot be scalarized, so handle the legalisation of the
+ // types like done elsewhere in SelectionDAG.
+ if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.Min)) {
+ LegalizeKind LK;
+ EVT PartVT = VT;
+ do {
+ // Iterate until we've found a legal (part) type to hold VT.
+ LK = getTypeConversion(Context, PartVT);
+ PartVT = LK.second;
+ } while (LK.first != TypeLegal);
+
+ NumIntermediates =
+ VT.getVectorElementCount().Min / PartVT.getVectorElementCount().Min;
+
+ // FIXME: This code needs to be extended to handle more complex vector
+ // breakdowns, like nxv7i64 -> nxv8i64 -> 4 x nxv2i64. Currently the only
+ // supported cases are vectors that are broken down into equal parts
+ // such as nxv6i64 -> 3 x nxv2i64.
+ assert(NumIntermediates * PartVT.getVectorElementCount().Min ==
+ VT.getVectorElementCount().Min &&
+ "Expected an integer multiple of PartVT");
+ IntermediateVT = PartVT;
+ RegisterVT = getRegisterType(Context, IntermediateVT);
+ return NumIntermediates;
+ }
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally
+ // we could break down into LHS/RHS like LegalizeDAG does.
if (!isPowerOf2_32(EltCnt.Min)) {
NumVectorRegs = EltCnt.Min;
EltCnt.Min = 1;
diff --git a/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll b/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll
new file mode 100644
index 000000000000..14123b476704
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll
@@ -0,0 +1,312 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
+
+; Test that scalable vectors that are a multiple of the legal vector size
+; can be properly broken down into part vectors.
+
+declare void @bar()
+
+;
+; Vectors twice the size
+;
+
+define <vscale x 32 x i8> @wide_32i8(i1 %b, <vscale x 16 x i8> %legal, <vscale x 32 x i8> %illegal) nounwind {
+; CHECK-LABEL: wide_32i8
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 32 x i8> undef
+L2:
+ ret <vscale x 32 x i8> %illegal
+}
+
+define <vscale x 16 x i16> @wide_16i16(i1 %b, <vscale x 16 x i8> %legal, <vscale x 16 x i16> %illegal) nounwind {
+; CHECK-LABEL: wide_16i16
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 16 x i16> undef
+L2:
+ ret <vscale x 16 x i16> %illegal
+}
+
+define <vscale x 8 x i32> @wide_8i32(i1 %b, <vscale x 16 x i8> %legal, <vscale x 8 x i32> %illegal) nounwind {
+; CHECK-LABEL: wide_8i32
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 8 x i32> undef
+L2:
+ ret <vscale x 8 x i32> %illegal
+}
+
+define <vscale x 4 x i64> @wide_4i64(i1 %b, <vscale x 16 x i8> %legal, <vscale x 4 x i64> %illegal) nounwind {
+; CHECK-LABEL: wide_4i64
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 4 x i64> undef
+L2:
+ ret <vscale x 4 x i64> %illegal
+}
+
+define <vscale x 16 x half> @wide_16f16(i1 %b, <vscale x 16 x i8> %legal, <vscale x 16 x half> %illegal) nounwind {
+; CHECK-LABEL: wide_16f16
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 16 x half> undef
+L2:
+ ret <vscale x 16 x half> %illegal
+}
+
+define <vscale x 8 x float> @wide_8f32(i1 %b, <vscale x 16 x i8> %legal, <vscale x 8 x float> %illegal) nounwind {
+; CHECK-LABEL: wide_8f32
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 8 x float> undef
+L2:
+ ret <vscale x 8 x float> %illegal
+}
+
+define <vscale x 4 x double> @wide_4f64(i1 %b, <vscale x 16 x i8> %legal, <vscale x 4 x double> %illegal) nounwind {
+; CHECK-LABEL: wide_4f64
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 4 x double> undef
+L2:
+ ret <vscale x 4 x double> %illegal
+}
+
+;
+; Vectors three times the size
+;
+
+define <vscale x 48 x i8> @wide_48i8(i1 %b, <vscale x 16 x i8> %legal, <vscale x 48 x i8> %illegal) nounwind {
+; CHECK-LABEL: wide_48i8
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: mov z2.d, z3.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 48 x i8> undef
+L2:
+ ret <vscale x 48 x i8> %illegal
+}
+
+define <vscale x 24 x i16> @wide_24i16(i1 %b, <vscale x 16 x i8> %legal, <vscale x 24 x i16> %illegal) nounwind {
+; CHECK-LABEL: wide_24i16
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: mov z2.d, z3.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 24 x i16> undef
+L2:
+ ret <vscale x 24 x i16> %illegal
+}
+
+define <vscale x 12 x i32> @wide_12i32(i1 %b, <vscale x 16 x i8> %legal, <vscale x 12 x i32> %illegal) nounwind {
+; CHECK-LABEL: wide_12i32
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: mov z2.d, z3.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 12 x i32> undef
+L2:
+ ret <vscale x 12 x i32> %illegal
+}
+
+define <vscale x 6 x i64> @wide_6i64(i1 %b, <vscale x 16 x i8> %legal, <vscale x 6 x i64> %illegal) nounwind {
+; CHECK-LABEL: wide_6i64
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: mov z2.d, z3.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 6 x i64> undef
+L2:
+ ret <vscale x 6 x i64> %illegal
+}
+
+define <vscale x 24 x half> @wide_24f16(i1 %b, <vscale x 16 x i8> %legal, <vscale x 24 x half> %illegal) nounwind {
+; CHECK-LABEL: wide_24f16
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: mov z2.d, z3.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 24 x half> undef
+L2:
+ ret <vscale x 24 x half> %illegal
+}
+
+define <vscale x 12 x float> @wide_12f32(i1 %b, <vscale x 16 x i8> %legal, <vscale x 12 x float> %illegal) nounwind {
+; CHECK-LABEL: wide_12f32
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: mov z2.d, z3.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 12 x float> undef
+L2:
+ ret <vscale x 12 x float> %illegal
+}
+
+define <vscale x 6 x double> @wide_6f64(i1 %b, <vscale x 16 x i8> %legal, <vscale x 6 x double> %illegal) nounwind {
+; CHECK-LABEL: wide_6f64
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: mov z2.d, z3.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 6 x double> undef
+L2:
+ ret <vscale x 6 x double> %illegal
+}
+
+;
+; Vectors four times the size
+;
+
+define <vscale x 64 x i8> @wide_64i8(i1 %b, <vscale x 16 x i8> %legal, <vscale x 64 x i8> %illegal) nounwind {
+; CHECK-LABEL: wide_64i8
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: mov z2.d, z3.d
+; CHECK-NEXT: mov z3.d, z4.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 64 x i8> undef
+L2:
+ ret <vscale x 64 x i8> %illegal
+}
+
+define <vscale x 32 x i16> @wide_32i16(i1 %b, <vscale x 16 x i8> %legal, <vscale x 32 x i16> %illegal) nounwind {
+; CHECK-LABEL: wide_32i16
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: mov z2.d, z3.d
+; CHECK-NEXT: mov z3.d, z4.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 32 x i16> undef
+L2:
+ ret <vscale x 32 x i16> %illegal
+}
+
+define <vscale x 16 x i32> @wide_16i32(i1 %b, <vscale x 16 x i8> %legal, <vscale x 16 x i32> %illegal) nounwind {
+; CHECK-LABEL: wide_16i32
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: mov z2.d, z3.d
+; CHECK-NEXT: mov z3.d, z4.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 16 x i32> undef
+L2:
+ ret <vscale x 16 x i32> %illegal
+}
+
+define <vscale x 8 x i64> @wide_8i64(i1 %b, <vscale x 16 x i8> %legal, <vscale x 8 x i64> %illegal) nounwind {
+; CHECK-LABEL: wide_8i64
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: mov z2.d, z3.d
+; CHECK-NEXT: mov z3.d, z4.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 8 x i64> undef
+L2:
+ ret <vscale x 8 x i64> %illegal
+}
+
+define <vscale x 32 x half> @wide_32f16(i1 %b, <vscale x 16 x i8> %legal, <vscale x 32 x half> %illegal) nounwind {
+; CHECK-LABEL: wide_32f16
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: mov z2.d, z3.d
+; CHECK-NEXT: mov z3.d, z4.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 32 x half> undef
+L2:
+ ret <vscale x 32 x half> %illegal
+}
+
+define <vscale x 16 x float> @wide_16f32(i1 %b, <vscale x 16 x i8> %legal, <vscale x 16 x float> %illegal) nounwind {
+; CHECK-LABEL: wide_16f32
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: mov z2.d, z3.d
+; CHECK-NEXT: mov z3.d, z4.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 16 x float> undef
+L2:
+ ret <vscale x 16 x float> %illegal
+}
+
+define <vscale x 8 x double> @wide_8f64(i1 %b, <vscale x 16 x i8> %legal, <vscale x 8 x double> %illegal) nounwind {
+; CHECK-LABEL: wide_8f64
+; CHECK: mov z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z2.d
+; CHECK-NEXT: mov z2.d, z3.d
+; CHECK-NEXT: mov z3.d, z4.d
+; CHECK-NEXT: ret
+ br i1 %b, label %L1, label %L2
+L1:
+ call void @bar()
+ ret <vscale x 8 x double> undef
+L2:
+ ret <vscale x 8 x double> %illegal
+}
More information about the llvm-commits
mailing list